# What this script does

Process the data on COVID cases and deaths by facility.

# I. Settings

In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)

# II. Data

### Import

In [2]:
df_ob_orig = pd.read_excel('../A_source_data/DSHS/2020-9-15--Nursing Home Outbreaks By Facility.xlsx',
                                  sheet_name='Prioritization_Input_Full_2020_',
                                  header=0, usecols='A:F')

df_ob_map_orig = pd.read_csv('../A_source_data/DSHS/outbreaks_mapping_table.csv')

### Process

Outbreaks dataset

In [3]:
# Create working copies
df_ob = df_ob_orig.copy()
df_ob_map = df_ob_map_orig.copy()

# Reduce
df_ob = df_ob[df_ob['Facility_Type']=='NF']
df_ob = df_ob.drop(['Facility_Type','License_Number'], axis=1).reset_index(drop=True)
# Rename columns
df_ob.columns = ['facility_name','beds','cases','deaths']
# New variables
df_ob['link'] = df_ob['facility_name'].str.lower().str.strip()
df_ob['case_rate'] = df_ob['cases']/df_ob['beds']
df_ob['death_rate'] = df_ob['deaths']/df_ob['beds']
# Set link
df_ob = df_ob.set_index(['link'])

Mapping table (It inserts the facility federal numbers)

In [4]:
df_ob_map = df_ob_map.set_index(['link'])

Join

In [5]:
df_ob = df_ob.join(df_ob_map, how='left')
df_ob = df_ob.sort_values(['case_rate'], ascending=False)
df_ob = df_ob.reset_index(drop=True)

# Rearange columns
cols = list(df_ob.columns)
cols = [cols[-1]] + cols[:-1]
df_ob = df_ob[cols]
df_ob

Unnamed: 0,federal_num,facility_name,beds,cases,deaths,case_rate,death_rate
0,505098,Shuksan Rehabilitation And Health Care,52,74,15,1.423077,0.288462
1,505263,Prestige Post-Acute And Rehab Center - Kittitas,74,102,15,1.378378,0.202703
2,505463,Sunrise View Convalescent Center,59,79,15,1.338983,0.254237
3,505265,Emerald Care,82,99,5,1.207317,0.060976
4,505010,Garden Village,101,119,16,1.178218,0.158416
...,...,...,...,...,...,...,...
204,505531,Heron'S Key,30,0,0,0.000000,0.000000
205,50A263,Lakeland Village Nursing Facility,93,0,0,0.000000,0.000000
206,505306,Life Care Center Of Port Townsend,94,0,0,0.000000,0.000000
207,505525,Manor Care Health Services - Lacey,120,0,0,0.000000,0.000000


In [6]:
df_ob.to_csv('../C_output_data/outbreaks.csv', index=False)