# Settings

In [1]:
import pandas as pd

# Original data

Citation frequency report for adult family homes.

Obtained from [Chris Wright](wrighcd2@dshs.wa.gov) on Friday, April 24, 2020 at 1:42 PM

In [2]:
source_path = '/Volumes/files/COVID19/DSHS_Facility_Reports_Manuel/FAC_1008_Report/'

df_orig = pd.read_excel(source_path + 'Copy of 2020 - 046 - Fac1008_CitationFrequencyAfhBh.xlsx',
                        sheet_name='Fac1008_CitationFrequencyAfhBh', 
                        header=11, usecols=range(0,14), dtype={'Region':str, 'LIC Nmbr':str})

In [3]:
print(df_orig.shape)
df_orig.dtypes

(12129, 14)


Region                                  object
Region-Unit                             object
Facility                                object
LIC Nmbr                                object
Visit Display Id                        object
Visit Start Date                datetime64[ns]
Last Date of Data Collection    datetime64[ns]
Date SOD Sent Out               datetime64[ns]
Facility Received Date          datetime64[ns]
Complaint number(s)                     object
Chapter Description                     object
Chapter Section                         object
WRD WACRCWCode                          object
Deficiency Type                         object
dtype: object

In [4]:
df_orig.head()

Unnamed: 0,Region,Region-Unit,Facility,LIC Nmbr,Visit Display Id,Visit Start Date,Last Date of Data Collection,Date SOD Sent Out,Facility Received Date,Complaint number(s),Chapter Description,Chapter Section,WRD WACRCWCode,Deficiency Type
0,1.0,1B,ANGELS LANDING NORTH,751471,024-FL-20190307,2019-03-06,2019-03-18,2019-03-19,2019-03-20,,Adult family home,WAC 388-76-10198,WAC 388-76-10198-4,Citation-POC
1,,,ANGELS LANDING NORTH,751471,024-FL-20190307,2019-03-06,2019-03-18,2019-03-19,2019-03-20,,Tuberculosis,WAC 388-76-10285,WAC 388-76-10285-1,Consultation
2,,,ANGELS LANDING NORTH,751471,024-FL-20190307,2019-03-06,2019-03-18,2019-03-19,2019-03-20,,Tuberculosis,WAC 388-76-10285,WAC 388-76-10285-2,Consultation
3,,,ANGELS LANDING NORTH,751471,024-FL-20190307,2019-03-06,2019-03-18,2019-03-19,2019-03-20,,Resident rights,WAC 388-76-10522,WAC 388-76-10522-3,Consultation
4,,,ANGELS LANDING NORTH,751471,024-FL-20190307,2019-03-06,2019-03-18,2019-03-19,2019-03-20,,Safety and maintenance,WAC 388-76-10750,WAC 388-76-10750-5-c,Citation-POC


In [5]:
df_orig.tail()

Unnamed: 0,Region,Region-Unit,Facility,LIC Nmbr,Visit Display Id,Visit Start Date,Last Date of Data Collection,Date SOD Sent Out,Facility Received Date,Complaint number(s),Chapter Description,Chapter Section,WRD WACRCWCode,Deficiency Type
12124,,,POLLYS COUNTRY AFH #1 INC,93301.0,035-FL-20190930,2019-09-27,2019-09-27,2019-10-18,2019-10-21,,Medication disposal,WAC 388-76-10490,WAC 388-76-10490-1,Consultation
12125,,,POLLYS COUNTRY AFH #1 INC,93301.0,035-FL-20190930,2019-09-27,2019-09-27,2019-10-18,2019-10-21,,Resident rights,WAC 388-76-10540,WAC 388-76-10540,Consultation
12126,,,POLLYS COUNTRY AFH #1 INC,93301.0,035-FL-20190930,2019-09-27,2019-09-27,2019-10-18,2019-10-21,,Resident rights,WAC 388-76-10540,WAC 388-76-10540-4,Consultation
12127,,Region Total:,,,,NaT,NaT,NaT,NaT,,,,Count of Citations:,3937
12128,State Total:,,,,,NaT,NaT,NaT,NaT,,,,Count of Citations:,12125


# Working data frame

In [6]:
df = df_orig.copy()

In [7]:
print(df.shape)
df.tail()

(12129, 14)


Unnamed: 0,Region,Region-Unit,Facility,LIC Nmbr,Visit Display Id,Visit Start Date,Last Date of Data Collection,Date SOD Sent Out,Facility Received Date,Complaint number(s),Chapter Description,Chapter Section,WRD WACRCWCode,Deficiency Type
12124,,,POLLYS COUNTRY AFH #1 INC,93301.0,035-FL-20190930,2019-09-27,2019-09-27,2019-10-18,2019-10-21,,Medication disposal,WAC 388-76-10490,WAC 388-76-10490-1,Consultation
12125,,,POLLYS COUNTRY AFH #1 INC,93301.0,035-FL-20190930,2019-09-27,2019-09-27,2019-10-18,2019-10-21,,Resident rights,WAC 388-76-10540,WAC 388-76-10540,Consultation
12126,,,POLLYS COUNTRY AFH #1 INC,93301.0,035-FL-20190930,2019-09-27,2019-09-27,2019-10-18,2019-10-21,,Resident rights,WAC 388-76-10540,WAC 388-76-10540-4,Consultation
12127,,Region Total:,,,,NaT,NaT,NaT,NaT,,,,Count of Citations:,3937
12128,State Total:,,,,,NaT,NaT,NaT,NaT,,,,Count of Citations:,12125


In [8]:
print(df['Region'].unique())
print(df['Region-Unit'].unique())

['1   ' nan '2   ' '3   ' 'State Total:']
['1B' nan '1C' '1E' 'Region Total:' '2B' '2E' '2G' '2I' '3A' '3D' '3E']


In [9]:
# Eliminate rows that display subtotals
df = df[df['Region'] != 'State Total:']
df = df[df['Region-Unit'] != 'Region Total:']
df = df.reset_index(drop=True)


# Fill the NaN values of columns 'Region' and 'Region-Unit'
df['Region'] = df['Region'].fillna(method='ffill')
df['Region-Unit'] = df['Region-Unit'].fillna(method='ffill')

In [10]:
df.head()

Unnamed: 0,Region,Region-Unit,Facility,LIC Nmbr,Visit Display Id,Visit Start Date,Last Date of Data Collection,Date SOD Sent Out,Facility Received Date,Complaint number(s),Chapter Description,Chapter Section,WRD WACRCWCode,Deficiency Type
0,1,1B,ANGELS LANDING NORTH,751471,024-FL-20190307,2019-03-06,2019-03-18,2019-03-19,2019-03-20,,Adult family home,WAC 388-76-10198,WAC 388-76-10198-4,Citation-POC
1,1,1B,ANGELS LANDING NORTH,751471,024-FL-20190307,2019-03-06,2019-03-18,2019-03-19,2019-03-20,,Tuberculosis,WAC 388-76-10285,WAC 388-76-10285-1,Consultation
2,1,1B,ANGELS LANDING NORTH,751471,024-FL-20190307,2019-03-06,2019-03-18,2019-03-19,2019-03-20,,Tuberculosis,WAC 388-76-10285,WAC 388-76-10285-2,Consultation
3,1,1B,ANGELS LANDING NORTH,751471,024-FL-20190307,2019-03-06,2019-03-18,2019-03-19,2019-03-20,,Resident rights,WAC 388-76-10522,WAC 388-76-10522-3,Consultation
4,1,1B,ANGELS LANDING NORTH,751471,024-FL-20190307,2019-03-06,2019-03-18,2019-03-19,2019-03-20,,Safety and maintenance,WAC 388-76-10750,WAC 388-76-10750-5-c,Citation-POC


In [11]:
df.tail()

Unnamed: 0,Region,Region-Unit,Facility,LIC Nmbr,Visit Display Id,Visit Start Date,Last Date of Data Collection,Date SOD Sent Out,Facility Received Date,Complaint number(s),Chapter Description,Chapter Section,WRD WACRCWCode,Deficiency Type
12120,3,3E,Queen Esther Adult Family Home,754261,002-CI-20191231,2019-12-30,2020-01-03,2020-01-08,2020-01-13,3685592.0,Resident rights,WAC 388-76-10522,WAC 388-76-10522-6,Consultation
12121,3,3E,POLLYS COUNTRY AFH #1 INC,93301,035-FL-20190930,2019-09-27,2019-09-27,2019-10-18,2019-10-21,,Medication disposal,WAC 388-76-10490,WAC 388-76-10490,Consultation
12122,3,3E,POLLYS COUNTRY AFH #1 INC,93301,035-FL-20190930,2019-09-27,2019-09-27,2019-10-18,2019-10-21,,Medication disposal,WAC 388-76-10490,WAC 388-76-10490-1,Consultation
12123,3,3E,POLLYS COUNTRY AFH #1 INC,93301,035-FL-20190930,2019-09-27,2019-09-27,2019-10-18,2019-10-21,,Resident rights,WAC 388-76-10540,WAC 388-76-10540,Consultation
12124,3,3E,POLLYS COUNTRY AFH #1 INC,93301,035-FL-20190930,2019-09-27,2019-09-27,2019-10-18,2019-10-21,,Resident rights,WAC 388-76-10540,WAC 388-76-10540-4,Consultation


In [14]:
print(df_orig['Visit Start Date'].min())
print(df_orig['Visit Start Date'].max())

2019-01-02 00:00:00
2019-12-31 00:00:00


In [15]:
print(df_orig['Last Date of Data Collection'].min())
print(df_orig['Last Date of Data Collection'].max())

2019-01-03 00:00:00
2020-12-30 00:00:00


In [16]:
print(df_orig['Date SOD Sent Out'].min())
print(df_orig['Date SOD Sent Out'].max())

2019-01-07 00:00:00
2020-03-03 00:00:00


In [17]:
print(df_orig['Facility Received Date'].min())
print(df_orig['Facility Received Date'].max())

2018-01-14 00:00:00
2090-08-22 00:00:00


TODO: *Facility Received Date* seeems to have dodgy values. Check.

# Analysis

In [28]:
print('Between 1/Jan/2019 and 31/Dec/2019, a total of', len(df),'WAC deficiencies were registered in', 
      df['LIC Nmbr'].nunique(), 'licensed adult family organizations. This amounts to an average of',
     len(df)/df['LIC Nmbr'].nunique(),'deficiencies per adult family home.')

Between 1/Jan/2019 and 31/Dec/2019, a total of 12125 WAC deficiencies were registered in 1942 licensed adult family organizations. This amounts to an average of 6.24356333676622 deficiencies per adult family home.


In [19]:
df['Deficiency Type'].value_counts(dropna=False, normalize=True)

Citation-POC       0.815423
Consultation       0.174186
Citation-No POC    0.010392
Name: Deficiency Type, dtype: float64

In [20]:
df['Chapter Description'].value_counts(dropna=False, normalize=True).head(30)

Resident rights                                 0.153155
Negotiated care plan                            0.100948
Medication                                      0.072742
Background checks                               0.070598
Qualifications                                  0.063753
Medication system                               0.056082
Adult family home                               0.048000
Tuberculosis                                    0.047588
Safety and maintenance                          0.042392
License annual fee                              0.036289
Resident record                                 0.036124
Medical devices                                 0.028948
Care and services                               0.023340
Medication storage                              0.018227
Reporting requirement                           0.017649
Emergency evacuation drills                     0.012454
Liability insurance required                    0.011134
Emergency drinking water supply

In [22]:
print('Only ', df['Complaint number(s)'].count()/len(df), 'of the total deficiences had a complaint number.')

Only  0.2595463917525773 of the total deficiences had a complaint number.


**QUESTION**. How should NAs be interpreted in the field 'Complatin number(s)'? Hypothesis:
- If there is a complaint number, then the violation/deficiency was found by an *inspection*.
- If there no complaint number, then the violation/deficiency was found by an *investigation*.

In [23]:
temp = df[['LIC Nmbr', 'Complaint number(s)']].drop_duplicates()
temp = temp.groupby('LIC Nmbr')
temp = temp.count().reset_index().sort_values('Complaint number(s)', ascending=False)
temp = temp.reset_index(drop=True)

# Consistency test
assert temp['Complaint number(s)'].sum() == df['Complaint number(s)'].nunique()

temp.head()

Unnamed: 0,LIC Nmbr,Complaint number(s)
0,554300,5
1,750450,4
2,753474,4
3,690500,4
4,753571,4


In [27]:
df['Facility'].value_counts(dropna=False).head(10)

EVELYN B PIPES AFH              99
Amanda's Senior Home Care       50
MAPLE PARK ADULT FAMILY HOME    42
IMNAYS AFH                      41
OCEAN BREEZE CARE HOME          41
DALISAY CORSILLES AFH           40
Dela Cruz AFH                   40
ROSEMARIE HOLDER AFH            40
LOVE OUTREACH                   39
APPLE CREEK                     39
Name: Facility, dtype: int64