# What this script does

We create a dataset of all deficiencies committed by WA-based nursing homes from 2017.

# I. SETTINGS

In [1]:
import pandas as pd
from os import listdir
import re

# II. IMPORT DATA

## Deficienies (CMS)

In [2]:
# Obtain a list of all the downloaded adult family PDF reports
source_path = '../../covid19_nursing_homes_big_data/Full-Statement-of-Deficiencies-July-2020/'
file_list = listdir(source_path)

# Weed out any files in the folder that are not PDFs
file_list = [file for file in file_list if re.search('\.xlsx$', file)]
file_list = pd.Series(file_list)

df_sod_orig = pd.DataFrame()

for file in file_list:
    print(file)
    df_temp = pd.read_excel(source_path + file,
                            header=0, 
                            usecols=range(0,13), 
                            dtype={'facility_id':object, 'zip':object, 'deficiency_tag':object})
    df_sod_orig = pd.concat([df_sod_orig, df_temp])
    del(df_temp)

df_sod_orig = df_sod_orig.reset_index(drop=True)

# Save down a CSV version
df_sod_orig.to_csv('../../covid19_nursing_homes_big_data/cms_sod_txt.csv', index=False)

text2567_20200701_cms_reg6.xlsx
text2567_20200701_cms_reg7.xlsx
text2567_20200701_cms_reg10.xlsx
text2567_20200701_cms_reg5a.xlsx
text2567_20200701_cms_reg1.xlsx
text2567_20200701_cms_reg2.xlsx
text2567_20200701_cms_reg3.xlsx
text2567_20200701_cms_reg5b.xlsx
text2567_20200701_cms_reg8.xlsx
text2567_20200701_cms_reg4.xlsx
text2567_20200701_cms_reg9.xlsx


## F-Tags (CMS)

The SOD dataframe above contains tha tag code for each defficiency recorded, but it doesn't contain the general group that each of those tag belongs to. The information is containd in [this list of the revised F-tags](https://www.cms.gov/Medicare/Provider-Enrollment-and-Certification/GuidanceforLawsAndRegulations/Downloads/List-of-Revised-FTags.pdf). 

A version is in this [F-Tag crosswalk Excel file](https://www.cms.gov/Medicare/Provider-Enrollment-and-Certification/GuidanceforLawsAndRegulations/Downloads/F-Tag-Crosswalk.xlsx). This is the data we are importing now and that will be adding to the SOD dataset later in the script:

In [3]:
df_tags_orig = pd.read_excel('../A_source_data/CMS/LTC FTags_Phase 2_Crosswalk.xlsx',
                             sheet_name='Sortable by Tags', usecols='A:H')
df_tags_orig.columns = ['tag', 'sqc_tag?', 'tag_title', 'cfr', 'tag_group', 'phase3', 'tag_old', 'moved_text']

So now we have a data frame that contains all the deficiencies found in all surveys carried out, and another dataframe that contains detailed information about the tags used to classify those deficiencies. We need to join both dataframes.

In [4]:
df_sod_orig.columns

Index(['facility_name', 'facility_id', 'address', 'city', 'state', 'zip',
       'inspection_date', 'deficiency_tag', 'scope_severity', 'complaint',
       'standard', 'eventid', 'inspection_text'],
      dtype='object')

## Severity code descriptions

The SOD dataframe also contains codes for the severity of each deficiency, but not a description of the severity level of each of those codes. Those descriptions can be found in the docment [Design for Nursing Home Compare
Five-Star Quality Rating System:
Technical Users’ Guide](https://www.cms.gov/Medicare/Provider-Enrollment-and-Certification/CertificationandComplianc/downloads/usersguide.pdf). The following mapping is based on that document:

In [5]:
severity = [['A', 'No actual harm with potential for minimal harm - Isolated'],
            ['B', 'No actual harm with potential for minimal harm - Pattern'],
            ['C', 'No actual harm with potential for minimal harm - Widespread'],
            ['D', 'No actual harm with potential for more than minimal harm that is not immediate jeopardy - Isolated'],
            ['E', 'No actual harm with potential for more than minimal harm that is not immediate jeopardy - Pattern'],
            ['F', 'No actual harm with potential for more than minimal harm that is not immediate jeopardy - Widespread'],
            ['G', 'Actual harm that is not immediate jeopardy - Isolated'],
            ['H', 'Actual harm that is not immediate jeopardy - Pattern'],
            ['I', 'Actual harm that is not immediate jeopardy - Widespread'],
            ['J', 'Immediate jeopardy to resident health or safety - Isolated'],
            ['K', 'Immediate jeopardy to resident health or safety - Pattern'],
            ['L', 'Immediate jeopardy to resident health or safety - Widespread']]

severity = pd.DataFrame(severity, columns=['scope_severity', 'severity_desc'])
severity

# Consitency test
assert set(df_sod_orig['scope_severity']).issubset(set(severity['scope_severity']))

# III. REDUCING: WA STATE

In [6]:
# del(df_sod_wa)
df_sod_wa = df_sod_orig.copy()
print(df_sod_wa.shape)

# Reduce to only WA homes
df_sod_wa = df_sod_wa[df_sod_wa['state'] == 'WA']
print(df_sod_wa.shape)

# Add the severity descriptions
df_sod_wa = df_sod_wa.join(severity.set_index('scope_severity'), on='scope_severity', how='left')

# Create a proper date column
df_sod_wa['inspection_dt'] = pd.to_datetime(df_sod_wa['inspection_date'])

# Eliminate unnecesary fields and reset index
df_sod_wa = df_sod_wa.drop(['address', 'city', 'state', 'zip', 'inspection_date', 'inspection_text'], axis=1)
df_sod_wa = df_sod_wa.drop_duplicates().reset_index(drop=True)

# Change some column names into something easier to use
df_sod_wa = df_sod_wa.rename(columns={'deficiency_tag':'tag', 
                                      'scope_severity':'severity_code'})

print(df_sod_wa.shape)

(368267, 13)
(11221, 13)
(11221, 9)


In [7]:
df_sod_wa

Unnamed: 0,facility_name,facility_id,tag,severity_code,complaint,standard,eventid,severity_desc,inspection_dt
0,ISSAQUAH NURSING AND REHABILITATION CENTER,505004,0661,D,1,0,WMDP11,No actual harm with potential for more than mi...,2019-02-27
1,ISSAQUAH NURSING AND REHABILITATION CENTER,505004,0583,D,0,1,1RUX11,No actual harm with potential for more than mi...,2020-02-27
2,ISSAQUAH NURSING AND REHABILITATION CENTER,505004,0695,D,0,1,1RUX11,No actual harm with potential for more than mi...,2020-02-27
3,ISSAQUAH NURSING AND REHABILITATION CENTER,505004,0732,C,0,1,1RUX11,No actual harm with potential for minimal harm...,2020-02-27
4,ISSAQUAH NURSING AND REHABILITATION CENTER,505004,0880,D,0,1,1RUX11,No actual harm with potential for more than mi...,2020-02-27
...,...,...,...,...,...,...,...,...,...
11216,LAKELAND VILLAGE NURSING FACILITY,50A263,0689,D,1,0,DW1U11,No actual harm with potential for more than mi...,2018-06-01
11217,LAKELAND VILLAGE NURSING FACILITY,50A263,0693,G,1,0,HI8S11,Actual harm that is not immediate jeopardy - I...,2019-07-17
11218,LAKELAND VILLAGE NURSING FACILITY,50A263,0759,D,1,0,HI8S11,No actual harm with potential for more than mi...,2019-07-17
11219,LAKELAND VILLAGE NURSING FACILITY,50A263,0881,J,1,0,NEL111,Immediate jeopardy to resident health or safet...,2019-08-09


#### What does each row this dataframe represent?

Question: Is each of the 11438 rows a unique combination of facility/survey/deficiency?

In [8]:
temp = df_sod_wa[['facility_id', 'eventid', 'tag']]
assert len(temp.drop_duplicates()) == len(df_sod_wa)
del(temp)

That is indeed the case. So each row seem to represent:
- a single deficiency
- found at a particular facility
- during a particular event (i.e., inspection or investigation)

In [9]:
df_sod_wa.nunique()

facility_name     203
facility_id       203
tag               283
severity_code      11
complaint           2
standard            2
eventid          1870
severity_desc      11
inspection_dt     731
dtype: int64

#### What is the tiime period covered by the dataset?

In [10]:
print(df_sod_wa['inspection_dt'].min())
print(df_sod_wa['inspection_dt'].max())

2016-06-11 00:00:00
2020-03-03 00:00:00


# IV. JOINING BOTH DATAFAMES

In [11]:
df_tags = df_tags_orig.copy()

In [12]:
# df_sod_wa
print('There are', df_sod_wa['tag'].nunique(), 'different tags in df_sod_wa\n')

# df_tags
print('There are', df_tags['tag'].nunique(), 'different NEW tags in df_tags')
print('There are', df_tags['tag_old'].nunique(), 'different OLD tags in df_tags')

There are 283 different tags in df_sod_wa

There are 205 different NEW tags in df_tags
There are 176 different OLD tags in df_tags


## Building a mapping table via tag numbers

First of all, based on a comparison of the two documents mentioned earlier ([the list of the revised F-tags](https://www.cms.gov/Medicare/Provider-Enrollment-and-Certification/GuidanceforLawsAndRegulations/Downloads/List-of-Revised-FTags.pdf) and the [F-Tag crosswalk spreadsheet](https://www.cms.gov/Medicare/Provider-Enrollment-and-Certification/GuidanceforLawsAndRegulations/Downloads/F-Tag-Crosswalk.xlsx)) there are a few adjustments that need to be made.

In [13]:
df_tags['tag_group'] = df_tags['tag_group'].str.replace('483.20  Resident Assessments\n483.70  Administration', '483.70  Administration')
df_tags['tag_group'] = df_tags['tag_group'].str.replace('483.10 Resident Rights\n483.12  Freedom from Abuse, Neglect, and Exploitation', '483.12  Freedom from Abuse, Neglect, and Exploitation')
df_tags['tag_group'] = df_tags['tag_group'].str.replace('483.10 Resident Rights\n483.90  Physical Environment', '483.90  Physical Environment')

Now we split the tag group numbers and names.

In [14]:
df_tags['tag_group_num'] = df_tags['tag_group'].str.extract('(\d+\.\d+)')
df_tags['tag_group_name'] = df_tags['tag_group'].str.replace('\d+\.\d+', '').str.strip()

In [15]:
df_tags

Unnamed: 0,tag,sqc_tag?,tag_title,cfr,tag_group,phase3,tag_old,moved_text,tag_group_num,tag_group_name
0,F540,,Definitions,483.5,,,F150,483.5,,
1,F550,X,Resident Rights/Exercise of Rights,483.10(a)(1)(2)(b)(1)(2),483.10 Resident Rights,,F151,483.10(b)(1)(2),483.10,Resident Rights
2,F551,,Rights Exercised by Representative,483.10(b)(3)-(7)(i)-(iii),483.10 Resident Rights,,F152,483.10(b)(3)-(7),483.10,Resident Rights
3,F573,,Right to Access/Purchase Copies of Records,483.10(g)(2)(i)(ii)(3),483.10 Resident Rights,,F153,483.10(g)(2)(3),483.10,Resident Rights
4,F552,,Right to be Informed/Make Treatment Decisions,483.10(c)(1)(4)(5),483.10 Resident Rights,,F154,483.10(c)(1)(4)(5),483.10,Resident Rights
...,...,...,...,...,...,...,...,...,...,...
244,F942,,Resident’s Rights Training,483.95(b),483.95 Training Requirements,Entire tag - Phase 3\nWill not be in ASPEN unt...,,No Associated Tag,483.95,Training Requirements
245,F944,,QAPI Training,483.95(d),483.95 Training Requirements,Entire tag - Phase 3\nWill not be in ASPEN unt...,,No Associated Tag,483.95,Training Requirements
246,F945,,Infection Control Training,483.95(e),483.95 Training Requirements,Entire tag - Phase 3\nWill not be in ASPEN unt...,,No Associated Tag,483.95,Training Requirements
247,F946,,Compliance and Ethics Training,483.95(f)(1)(2),483.95 Training Requirements,Entire tag - Phase 3\nWill not be in ASPEN unt...,,No Associated Tag,483.95,Training Requirements


In [16]:
# New tags
df_tags_new = pd.DataFrame(df_tags[['tag', 'tag_group_num', 'tag_group_name']])
df_tags_new['tag_old_new'] = 'New'

# Old tags
df_tags_old = pd.DataFrame(df_tags[['tag_old', 'tag_group_num', 'tag_group_name']])
df_tags_old = df_tags_old.rename(columns={'tag_old':'tag'})
df_tags_old['tag_old_new'] = 'Old'

# Old and new together
df_tag_map = pd.concat([df_tags_new, df_tags_old], axis=0)

# Reduce and tidy up
df_tag_map = df_tag_map.dropna(axis=0, how='any')
df_tag_map = df_tag_map.drop_duplicates()
df_tag_map = df_tag_map.sort_values(['tag', 'tag_group_num'], ascending=True)
df_tag_map = df_tag_map.reset_index(drop=True)

In [17]:
df_tag_map

Unnamed: 0,tag,tag_group_num,tag_group_name,tag_old_new
0,F151,483.10,Resident Rights,Old
1,F152,483.10,Resident Rights,Old
2,F153,483.10,Resident Rights,Old
3,F154,483.10,Resident Rights,Old
4,F155,483.10,Resident Rights,Old
...,...,...,...,...
385,F945,483.95,Training Requirements,New
386,F946,483.95,Training Requirements,New
387,F947,483.95,Training Requirements,New
388,F948,483.95,Training Requirements,New


In [18]:
df_tag_map['tag_group_name'].value_counts()

Resident Rights                                         77
Physical Environment                                    37
Administration                                          33
Quality of Care                                         29
Food and Nutrition Services                             28
Resident Assessments                                    22
Laboratory, Radiology, and Other Diagnostic Services    22
Nursing Services                                        17
Freedom from Abuse, Neglect, and Exploitation           16
Admission, Transfer, and Discharge                      15
Comprehensive Resident Centered Care Plans              13
Pharmacy Services                                       13
Training Requirements                                   13
Quality of Life                                         13
Physician Services                                      12
Behavioral Health Services                              10
Infection Control                                       

#### Check and adjust for double classifications

Back in 2018, some tags were renamed and/or reclassified. The mapping table below addressses that reclassification, except for one situation: When a tag is not fully "transported" into another section, but broken down into components and then those components being moved around. 

For example, the old tag F309 was belonged to the *Quality of Life* regulatory grouping. When F309 was reviewed, it got broken down into components and one of them was reclassified into the *Behavioral Health Services* group. So now when we do the mapping from old to new tags, each instance of F309 produces two records that are copies of each other, except for the grouping. In other words, we are double counting.

Here we adjust to correct that.

In [19]:
# If a tag show up more than once in the mapping table, 
# it means it was wrongly assigned more than tag group. Let's find those.
double_count = df_tag_map['tag'].value_counts()
double_count = double_count[double_count > 1].reset_index()
double_count = df_tag_map[df_tag_map['tag'].isin(double_count['index'])]

double_count

Unnamed: 0,tag,tag_group_num,tag_group_name,tag_old_new
4,F155,483.1,Resident Rights,Old
5,F155,483.24,Quality of Life,Old
13,F164,483.1,Resident Rights,Old
14,F164,483.7,Administration,Old
41,F226,483.12,"Freedom from Abuse, Neglect, and Exploitation",Old
42,F226,483.95,Training Requirements,Old
68,F279,483.2,Resident Assessments,Old
69,F279,483.21,Comprehensive Resident Centered Care Plans,Old
70,F280,483.1,Resident Rights,Old
71,F280,483.21,Comprehensive Resident Centered Care Plans,Old


We found [an old CMS document](https://www.cms.gov/Regulations-and-Guidance/Guidance/Transmittals/downloads/R5SOM.pdf), that sheds light into the previous grouping of the old codes. The following adjustments are made based on that document:

In [20]:
df_tag_map.loc[df_tag_map['tag']=='F155', ['tag_group_num','tag_group_name']] = ['483.10','Resident Rights']
df_tag_map.loc[df_tag_map['tag']=='F164', ['tag_group_num','tag_group_name']] = ['483.10','Resident Rights']
df_tag_map.loc[df_tag_map['tag']=='F280', ['tag_group_num','tag_group_name']] = ['483.10','Resident Rights']
df_tag_map.loc[df_tag_map['tag']=='F226', ['tag_group_num','tag_group_name']] = ['483.12','Freedom from Abuse, Neglect, and Exploitation']
df_tag_map.loc[df_tag_map['tag']=='F279', ['tag_group_num','tag_group_name']] = ['483.21','Comprehensive Resident Centered Care Plans']
df_tag_map.loc[df_tag_map['tag']=='F309', ['tag_group_num','tag_group_name']] = ['483.25','Quality of Care']
df_tag_map.loc[df_tag_map['tag']=='F461', ['tag_group_num','tag_group_name']] = ['483.90','Physical Environment']
df_tag_map.loc[df_tag_map['tag']=='F498', ['tag_group_num','tag_group_name']] = ['483.35','Nursing Services']

df_tag_map = df_tag_map.drop_duplicates().reset_index(drop=True)

In [21]:
df_tag_map

Unnamed: 0,tag,tag_group_num,tag_group_name,tag_old_new
0,F151,483.10,Resident Rights,Old
1,F152,483.10,Resident Rights,Old
2,F153,483.10,Resident Rights,Old
3,F154,483.10,Resident Rights,Old
4,F155,483.10,Resident Rights,Old
...,...,...,...,...
375,F945,483.95,Training Requirements,New
376,F946,483.95,Training Requirements,New
377,F947,483.95,Training Requirements,New
378,F948,483.95,Training Requirements,New


In [22]:
# After our reclassification, let's see if there remain any tags that are assigned to more than group.
double_count = df_tag_map['tag'].value_counts()
double_count = double_count[double_count > 1].reset_index()
double_count = df_tag_map[df_tag_map['tag'].isin(double_count['index'])]
double_count = double_count.drop_duplicates()

double_count

Unnamed: 0,tag,tag_group_num,tag_group_name,tag_old_new
110,F373,483.6,Food and Nutrition Services,Old
111,F373,483.95,Training Requirements,Old


Just one. We checked, and it is not present in the ***df_sod_wa***, so we let it be.

Now we standardize some of the names of the tag_group_name, to make them consistent with the way they are named in the WA state regulation

In [23]:
df_tag_map['tag_group_name'] = df_tag_map['tag_group_name'].str.replace('Resident Rights.*', 'Resident Rights')
df_tag_map['tag_group_name'] = df_tag_map['tag_group_name'].str.replace('Admission, Transfer, and Discharge', 'Admission, Transfer and Discharge')
df_tag_map['tag_group_name'] = df_tag_map['tag_group_name'].str.replace('Resident Assessments.*', 'Resident Assessment and Plan of Care')
df_tag_map['tag_group_name'] = df_tag_map['tag_group_name'].str.replace('Specialized Rehabilitative Services', 'Specialized Habilitative and Rehabilitative Services')
df_tag_map['tag_group_name'] = df_tag_map['tag_group_name'].str.replace('Food and Nutrition Services', 'Food Services Areas')

# Reduce and tidy up
df_tag_map = df_tag_map.drop_duplicates().reset_index(drop=True)

In [24]:
df_tag_map

Unnamed: 0,tag,tag_group_num,tag_group_name,tag_old_new
0,F151,483.10,Resident Rights,Old
1,F152,483.10,Resident Rights,Old
2,F153,483.10,Resident Rights,Old
3,F154,483.10,Resident Rights,Old
4,F155,483.10,Resident Rights,Old
...,...,...,...,...
375,F945,483.95,Training Requirements,New
376,F946,483.95,Training Requirements,New
377,F947,483.95,Training Requirements,New
378,F948,483.95,Training Requirements,New


In [25]:
df_tag_map['tag_group_name'].value_counts(dropna=False)

Resident Rights                                         76
Physical Environment                                    37
Administration                                          32
Food Services Areas                                     28
Quality of Care                                         28
Laboratory, Radiology, and Other Diagnostic Services    22
Resident Assessment and Plan of Care                    21
Nursing Services                                        17
Freedom from Abuse, Neglect, and Exploitation           16
Admission, Transfer and Discharge                       15
Pharmacy Services                                       13
Comprehensive Resident Centered Care Plans              12
Physician Services                                      12
Quality of Life                                         11
Training Requirements                                   11
Behavioral Health Services                               9
Infection Control                                       

## Joining

We now have a mapping table ready.
Now let's make sure that the tag number columns in each of the two datasets we will join have consistent values.

In [26]:
print('Unique tags in df_sod_wa =', df_sod_wa['tag'].nunique())
print(df_sod_wa['tag'].unique())
print('\r')

print('Unique tags in df_tag_map =', df_tag_map['tag'].nunique())
print(df_tag_map['tag'].unique())

Unique tags in df_sod_wa = 283
['0661' '0583' '0695' '0732' '0880' '0912' '0514' '0253' '0279' '0328'
 '0329' '0425' '0441' '0458' '0678' '0552' '0641' '0657' '0658' '0684'
 '0755' '0761' '0804' '0812' '0842' '0881' '0248' '0600' '0697' '0225'
 '0247' '0640' '0689' '0757' '0758' '0791' '0610' '0760' '0323' '0159'
 '0160' '0282' '0285' '0309' '0313' '0334' '0371' '0428' '0431' '0604'
 '0637' '0645' '0656' '0686' '0730' '0226' '0550' '0570' '0582' '0692'
 '0712' '0725' '0580' '0621' '0557' '0609' '0585' '0694' '0660' '0655'
 '0744' '0584' '0622' '0623' '0625' '0685' '0688' '0727' '0803' '0921'
 '0553' '0561' '0578' '0740' '0745' '0756' '0883' '0170' '0241' '0280'
 '0311' '0406' '0164' '0462' '0644' '0677' '0690' '0693' '0849' '0558'
 '0565' '0577' '0607' '0676' '0679' '0838' '0908' '0925' '0947' '0837'
 '0156' '0167' '0242' '0250' '0281' '0312' '0317' '0318' '0353' '0386'
 '0520' '0839' '0157' '0636' '0698' '0800' '0810' '0252' '0166' '0333'
 '0465' '0626' '0310' '0325' '0555' '0919' '03

In *df_sod_wa*, instead of an 'F', the tags start with a '0'. Here we make them compatible.

In [27]:
df_tag_map['tag'] = df_tag_map['tag'].str.replace('F', '0')
df_tag_map['tag'] = df_tag_map['tag'].str.replace('\n', '')

print(len(set(df_sod_wa['tag']).intersection(set(df_tag_map['tag']))))

280


Before

In [28]:
df_sod_wa

Unnamed: 0,facility_name,facility_id,tag,severity_code,complaint,standard,eventid,severity_desc,inspection_dt
0,ISSAQUAH NURSING AND REHABILITATION CENTER,505004,0661,D,1,0,WMDP11,No actual harm with potential for more than mi...,2019-02-27
1,ISSAQUAH NURSING AND REHABILITATION CENTER,505004,0583,D,0,1,1RUX11,No actual harm with potential for more than mi...,2020-02-27
2,ISSAQUAH NURSING AND REHABILITATION CENTER,505004,0695,D,0,1,1RUX11,No actual harm with potential for more than mi...,2020-02-27
3,ISSAQUAH NURSING AND REHABILITATION CENTER,505004,0732,C,0,1,1RUX11,No actual harm with potential for minimal harm...,2020-02-27
4,ISSAQUAH NURSING AND REHABILITATION CENTER,505004,0880,D,0,1,1RUX11,No actual harm with potential for more than mi...,2020-02-27
...,...,...,...,...,...,...,...,...,...
11216,LAKELAND VILLAGE NURSING FACILITY,50A263,0689,D,1,0,DW1U11,No actual harm with potential for more than mi...,2018-06-01
11217,LAKELAND VILLAGE NURSING FACILITY,50A263,0693,G,1,0,HI8S11,Actual harm that is not immediate jeopardy - I...,2019-07-17
11218,LAKELAND VILLAGE NURSING FACILITY,50A263,0759,D,1,0,HI8S11,No actual harm with potential for more than mi...,2019-07-17
11219,LAKELAND VILLAGE NURSING FACILITY,50A263,0881,J,1,0,NEL111,Immediate jeopardy to resident health or safet...,2019-08-09


In [29]:
df_sod_wa = df_sod_wa.join(df_tag_map.set_index('tag'), on='tag', how='left')

After

In [30]:
df_sod_wa

Unnamed: 0,facility_name,facility_id,tag,severity_code,complaint,standard,eventid,severity_desc,inspection_dt,tag_group_num,tag_group_name,tag_old_new
0,ISSAQUAH NURSING AND REHABILITATION CENTER,505004,0661,D,1,0,WMDP11,No actual harm with potential for more than mi...,2019-02-27,483.21,Comprehensive Resident Centered Care Plans,New
1,ISSAQUAH NURSING AND REHABILITATION CENTER,505004,0583,D,0,1,1RUX11,No actual harm with potential for more than mi...,2020-02-27,483.10,Resident Rights,New
2,ISSAQUAH NURSING AND REHABILITATION CENTER,505004,0695,D,0,1,1RUX11,No actual harm with potential for more than mi...,2020-02-27,483.25,Quality of Care,New
3,ISSAQUAH NURSING AND REHABILITATION CENTER,505004,0732,C,0,1,1RUX11,No actual harm with potential for minimal harm...,2020-02-27,483.35,Nursing Services,New
4,ISSAQUAH NURSING AND REHABILITATION CENTER,505004,0880,D,0,1,1RUX11,No actual harm with potential for more than mi...,2020-02-27,483.80,Infection Control,New
...,...,...,...,...,...,...,...,...,...,...,...,...
11216,LAKELAND VILLAGE NURSING FACILITY,50A263,0689,D,1,0,DW1U11,No actual harm with potential for more than mi...,2018-06-01,483.25,Quality of Care,New
11217,LAKELAND VILLAGE NURSING FACILITY,50A263,0693,G,1,0,HI8S11,Actual harm that is not immediate jeopardy - I...,2019-07-17,483.25,Quality of Care,New
11218,LAKELAND VILLAGE NURSING FACILITY,50A263,0759,D,1,0,HI8S11,No actual harm with potential for more than mi...,2019-07-17,483.45,Pharmacy Services,New
11219,LAKELAND VILLAGE NURSING FACILITY,50A263,0881,J,1,0,NEL111,Immediate jeopardy to resident health or safet...,2019-08-09,483.80,Infection Control,New


Reorder columns

In [31]:
df_sod_wa = df_sod_wa[['facility_name', 'facility_id',
                       'eventid', 'inspection_dt', 
                       'tag', 'tag_group_num', 'tag_group_name', 'tag_old_new', 
                       'severity_code', 'severity_desc',
                       'complaint', 'standard']]

# V. EXPORTING RESULTS

In [32]:
df_sod_wa.to_csv('../C_output_data/sod_wa.csv', index=False)