# What this script does

The data we scraped from the enforcement letters include the [Washington Administrative Codes (WAC)](https://apps.leg.wa.gov/wac/default.aspx?cite=388-97) that the surveyed nursing homes were found to be out of compliance with.

In this script, we add the details of each of those codes.

# I. Settings

In [1]:
import pandas as pd
import requests
import bs4
import numpy as np
import re
import time

In [2]:
output_path = '../C_output_data/'

# II. Hieararchy

## TITLE AREA
## Title
### SUBCHAPTER
### Chapter
#### Section

# III. Scraping fest

## III.1. Title 338

In [3]:
# Send a URL request to the site that contains all the Title 388 chapters
page = requests.get('https://apps.leg.wa.gov/wac/default.aspx?cite=388')
soup = bs4.BeautifulSoup(page.text, 'html.parser')

# The list of links to each of the reports is contained it a 'div' table with id='content_results'
table = soup.find('table')
ls_tr = table.find_all('tr')

In [4]:
# As we dowload all reports from all facilities, we will save some of their metadata in this new DF:
df_t338 = pd.DataFrame(columns = ['area', 'ttl_chp', 'ttl_chp_desc', 'ttl_chp_link'])

for tr in ls_tr:
    if tr.find('td', {'colspan':'2'}):
        new_record = {'area':tr.find('td').text,
                      'ttl_chp':np.nan,
                      'ttl_chp_link':np.nan,
                      'ttl_chp_desc':np.nan}
    else:
        new_record = {'area':np.nan,
                      'ttl_chp':tr.find_all('td')[0].text,
                      'ttl_chp_link':tr.find_all('td')[0].find('a').get('href'),
                      'ttl_chp_desc':tr.find_all('td')[1].text}
    df_t338 = df_t338.append(new_record, ignore_index=True)
    del(new_record)

df_t338['area'] = df_t338['area'].fillna(method='ffill')

df_t338.dropna(axis=0, how='any', inplace=True)
df_t338.reset_index(drop=True, inplace=True)

In [5]:
df_t338

Unnamed: 0,area,ttl_chp,ttl_chp_desc,ttl_chp_link
0,GENERAL,388-01,DSHS organization/disclosure of public records.,default.aspx?cite=388-01
1,GENERAL,388-02,DSHS hearing rules.,default.aspx?cite=388-02
2,GENERAL,388-03,Certification of DSHS spoken language interpre...,default.aspx?cite=388-03
3,GENERAL,388-04,Protection of human research subjects.,default.aspx?cite=388-04
4,GENERAL,388-05,Contractor billing requirements—General.,default.aspx?cite=388-05
...,...,...,...,...
105,SPECIAL COMMITMENT CENTER,388-880,Special commitment—Sexually violent predators.,default.aspx?cite=388-880
106,SPECIAL COMMITMENT CENTER,388-881,Sexual predator program—External oversight.,default.aspx?cite=388-881
107,SPECIAL COMMITMENT CENTER,388-885,Civil commitment cost reimbursement.,default.aspx?cite=388-885
108,VOCATIONAL REHABILITATION,388-891A,Vocational rehabilitation services for individ...,default.aspx?cite=388-891A


## III.2. Title 338 Chapter 97

In [6]:
# Send a URL request to the site that contains all the Title 388 chapters
del(page, soup)
page = requests.get('https://apps.leg.wa.gov/wac/default.aspx?cite=388-97')
soup = bs4.BeautifulSoup(page.text, 'html.parser')

# The list of links to each of the reports is contained it a 'div' table with id='content_results'
table = soup.find('table')
ls_tr = table.find_all('tr')

In [7]:
# As we dowload all reports from all facilities, we will save some of their metadata in this new DF:
df_t338c97 = pd.DataFrame(columns = ['sub_chp_num', 'sub_chp_name', 'section',
                                      'ttl_chp_sec', 'ttl_chp_sec_desc'])

for tr in ls_tr:

    if tr.find('td', {'colspan':'3'}):
        
        if len(tr.find_all('div')) == 3:
            new_record = {'sub_chp_num':tr.find_all('div')[0].text,
                          'sub_chp_name':tr.find_all('div')[1].text,
                          'section':tr.find_all('div')[2].text,
                          'ttl_chp_sec':np.nan,
                          'ttl_chp_sec_desc':np.nan}
            
        elif len(tr.find_all('div')) == 2:
            new_record = {'sub_chp_num':tr.find_all('div')[0].text,
                          'sub_chp_name':tr.find_all('div')[1].text,
                          'section':np.nan,
                          'ttl_chp_sec':np.nan,
                          'ttl_chp_sec_desc':np.nan}
            
        elif len(tr.find_all('div')) == 1:
            new_record = {'sub_chp_num':np.nan,
                          'sub_chp_name':np.nan,
                          'section':tr.find_all('div')[0].text,
                          'ttl_chp_sec':np.nan,
                          'ttl_chp_sec_desc':np.nan}
            
    else:
        
        new_record = {'sub_chp_num':np.nan,
                      'sub_chp_name':np.nan,
                      'section':np.nan,
                      'ttl_chp_sec':tr.find_all('td')[1].text,
                      'ttl_chp_sec_desc':tr.find_all('td')[2].text}

    df_t338c97 = df_t338c97.append(new_record, ignore_index=True)

df_t338c97['sub_chp_num'] = df_t338c97['sub_chp_num'].fillna(method='ffill')
df_t338c97['sub_chp_name'] = df_t338c97['sub_chp_name'].fillna(method='ffill')
df_t338c97['section'] = df_t338c97['section'].fillna(method='ffill')

df_t338c97 = df_t338c97[~df_t338c97.isna().any(axis=1)]
df_t338c97.reset_index(drop=True, inplace=True)

In [8]:
df_t338c97

Unnamed: 0,sub_chp_num,sub_chp_name,section,ttl_chp_sec,ttl_chp_sec_desc
0,SUBCHAPTER I,"RESIDENT RIGHTS, CARE AND RELATED SERVICES",Definitions,388-97-0001,Definitions.
1,SUBCHAPTER I,"RESIDENT RIGHTS, CARE AND RELATED SERVICES","Admission, Transfer and Discharge",388-97-0020,Nursing facility care.
2,SUBCHAPTER I,"RESIDENT RIGHTS, CARE AND RELATED SERVICES","Admission, Transfer and Discharge",388-97-0040,Discrimination prohibited.
3,SUBCHAPTER I,"RESIDENT RIGHTS, CARE AND RELATED SERVICES","Admission, Transfer and Discharge",388-97-0060,Nursing facility admission and payment require...
4,SUBCHAPTER I,"RESIDENT RIGHTS, CARE AND RELATED SERVICES","Admission, Transfer and Discharge",388-97-0080,Discharge planning.
...,...,...,...,...,...
237,SUBCHAPTER IV,NURSING HOME LICENSURE PROGRAM ADMINISTRATION,"Licensed Bed Capacity, Relocation of Residents...",388-97-4640,Receivership.
238,SUBCHAPTER IV,NURSING HOME LICENSURE PROGRAM ADMINISTRATION,"Licensed Bed Capacity, Relocation of Residents...",388-97-4660,Temporary managers and receivers—Application.
239,SUBCHAPTER IV,NURSING HOME LICENSURE PROGRAM ADMINISTRATION,"Licensed Bed Capacity, Relocation of Residents...",388-97-4680,Temporary managers and receivers—Consideration...
240,SUBCHAPTER IV,NURSING HOME LICENSURE PROGRAM ADMINISTRATION,"Licensed Bed Capacity, Relocation of Residents...",388-97-4700,Duties and powers of temporary manager and rec...


In [9]:
df_t338c97['sub_chp_num'].value_counts(dropna=False)

SUBCHAPTER II     105
SUBCHAPTER I      103
SUBCHAPTER IV      21
SUBCHAPTER III     13
Name: sub_chp_num, dtype: int64

In [10]:
df_t338c97['sub_chp_name'].value_counts(dropna=False)

PHYSICAL ENVIRONMENT                             105
RESIDENT RIGHTS, CARE AND RELATED SERVICES       103
NURSING HOME LICENSURE PROGRAM ADMINISTRATION     21
NURSING HOME LICENSE                              13
Name: sub_chp_name, dtype: int64

In [11]:
df_t338c97['section'].value_counts(dropna=False)

Resident Rights                                                                         35
Licensed Bed Capacity, Relocation of Residents and License Relinquishment               24
Infection Control                                                                       14
Administration                                                                          13
Nursing Services                                                                        10
General                                                                                  9
General Design Requirements in New Construction                                          9
Resident Care Unit                                                                       9
Resident Room Equipment                                                                  9
Lighting and Electrical                                                                  8
Admission, Transfer and Discharge                                                        8

In [12]:
# Consistency test
assert len(df_t338c97) == df_t338c97['ttl_chp_sec_desc'].nunique()

# Export

In [13]:
df_t338.to_csv(output_path + 'wac_codes_title338.csv', index=False)
df_t338c97.to_csv(output_path + 'wac_codes_df_t338c97.csv', index=False)