# What this script does

Find if there is a correlation between the COVID rates of cases or deaths to the deficiency history across the facilities.

# I. SETTINGS

In [1]:
import pandas as pd
import numpy as np
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)

# II. DATA

### Import

In [2]:
# Deficiencies (from CMS database)
df_sod_orig = pd.read_csv('../C_output_data/sod_wa.csv', 
                             dtype='object', parse_dates=['inspection_dt'])

# COVID Outbreaks
df_ob_orig = pd.read_csv('../C_output_data/outbreaks.csv')

### Create working copies

In [3]:
# 1. Statements of deficiency
df_sod = df_sod_orig.copy()
df_ob = df_ob_orig.copy()

### Deficiencies

From the [CMS federal tag list](https://www.cms.gov/Medicare/Provider-Enrollment-and-Certification/GuidanceforLawsAndRegulations/Downloads/List-of-Revised-FTags.pdf), we identified the federal tags that are related to staffing:

In [4]:
temp = df_sod.copy()
temp = temp[['facility_id','tag_group_name']]
temp['counter'] = 1
temp = temp.pivot_table(index='facility_id', 
                            columns='tag_group_name',
                            values = 'counter',
                            aggfunc='sum',
                            fill_value = 0)


df = temp.join(df_ob.set_index(['federal_num']), how='left')

# Rearange columns
cols = list(df.columns)
cols = cols[-2:] + cols[:-2]
df = df[cols]
df = df.drop(['beds','cases','deaths','facility_name'],axis=1)
df

Unnamed: 0,case_rate,death_rate,Administration,"Admission, Transfer and Discharge",Behavioral Health Services,Comprehensive Resident Centered Care Plans,Dental Services,Food Services Areas,"Freedom from Abuse, Neglect, and Exploitation",Infection Control,"Laboratory, Radiology, and Other Diagnostic Services",Nursing Services,Pharmacy Services,Physical Environment,Physician Services,Quality Assurance and Performance Improvement,Quality of Care,Quality of Life,Resident Assessment and Plan of Care,Resident Rights,Specialized Habilitative and Rehabilitative Services,Training Requirements
505004,0.685714,0.135714,3,0,0,4,0,2,0,4,0,1,4,3,0,0,3,1,1,3,0,0
505009,0.800000,0.173913,0,0,0,0,1,2,2,0,0,0,3,2,0,0,3,1,2,1,0,0
505010,1.178218,0.158416,0,1,0,4,0,2,6,3,0,2,7,0,1,0,10,0,3,7,0,0
505016,0.000000,0.000000,4,4,4,9,0,2,2,4,0,2,7,2,0,0,11,2,1,13,1,0
505017,0.448485,0.084848,7,5,3,8,1,1,4,5,0,3,3,2,1,1,13,6,4,17,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50A181,0.000000,0.000000,1,0,1,4,0,1,2,2,0,0,1,0,0,1,1,0,3,1,0,0
50A260,0.000000,0.000000,0,0,0,2,0,2,9,1,0,0,6,0,0,0,3,0,1,1,0,0
50A261,0.000000,0.000000,0,2,2,2,0,0,9,1,0,1,5,0,0,0,7,0,0,6,0,0
50A263,0.000000,0.000000,2,0,1,3,0,1,1,2,1,1,8,0,1,0,7,3,2,3,0,1


In [5]:
print(df.drop(['case_rate','death_rate'],axis=1).sum().sum())
print(len(df_sod))

11905
11915


In [6]:
# df.corr().to_csv('/Users/mvilla/Downloads/correlation.csv')
df.corr()

Unnamed: 0,case_rate,death_rate,Administration,"Admission, Transfer and Discharge",Behavioral Health Services,Comprehensive Resident Centered Care Plans,Dental Services,Food Services Areas,"Freedom from Abuse, Neglect, and Exploitation",Infection Control,"Laboratory, Radiology, and Other Diagnostic Services",Nursing Services,Pharmacy Services,Physical Environment,Physician Services,Quality Assurance and Performance Improvement,Quality of Care,Quality of Life,Resident Assessment and Plan of Care,Resident Rights,Specialized Habilitative and Rehabilitative Services,Training Requirements
case_rate,1.0,0.906333,-0.070711,-0.056832,-0.142905,-0.018919,-0.118708,-0.020874,-0.078452,-0.076916,-0.08330132,-0.09947,-0.099076,0.079813,-0.059486,-0.030005,-0.008539,-0.064206,0.003273,-0.017402,-0.07977456,0.036732
death_rate,0.906333,1.0,-0.006602,-0.038369,-0.128068,0.000219,-0.08405,-0.026875,-0.046149,-0.055469,-0.05032593,-0.044732,-0.088235,0.101443,-0.038512,-0.01882,0.008609,-0.026501,0.080962,-0.000697,-0.09432626,0.082267
Administration,-0.070711,-0.006602,1.0,0.404203,0.484614,0.54793,0.404329,0.355004,0.490118,0.522802,0.2868839,0.553841,0.48146,0.222454,0.040951,0.480932,0.545278,0.510839,0.536406,0.581787,0.1939449,0.48472
"Admission, Transfer and Discharge",-0.056832,-0.038369,0.404203,1.0,0.454058,0.41777,0.327545,0.286386,0.338664,0.463441,0.1976856,0.524624,0.400154,0.296736,0.038293,0.291639,0.500856,0.486341,0.296721,0.525489,0.2318414,0.331794
Behavioral Health Services,-0.142905,-0.128068,0.484614,0.454058,1.0,0.548805,0.434123,0.336238,0.466063,0.447504,0.2984653,0.521118,0.524926,0.177435,0.071303,0.507076,0.692809,0.482001,0.45231,0.60369,0.1106063,0.212397
Comprehensive Resident Centered Care Plans,-0.018919,0.000219,0.54793,0.41777,0.548805,1.0,0.422347,0.355924,0.464627,0.53067,0.3130432,0.610319,0.700166,0.139913,0.161591,0.438235,0.687549,0.5224,0.511102,0.635975,0.06354936,0.293552
Dental Services,-0.118708,-0.08405,0.404329,0.327545,0.434123,0.422347,1.0,0.418912,0.383232,0.352318,0.1832192,0.406017,0.453108,0.166509,0.060676,0.404357,0.538452,0.402956,0.530336,0.492833,0.1380437,0.274675
Food Services Areas,-0.020874,-0.026875,0.355004,0.286386,0.336238,0.355924,0.418912,1.0,0.383497,0.315381,0.1658824,0.333072,0.397464,0.161883,-0.015361,0.348626,0.483411,0.339175,0.42814,0.409374,0.2172573,0.20501
"Freedom from Abuse, Neglect, and Exploitation",-0.078452,-0.046149,0.490118,0.338664,0.466063,0.464627,0.383232,0.383497,1.0,0.437262,0.1091519,0.56682,0.44882,0.154952,0.109813,0.366467,0.588803,0.422656,0.42578,0.581643,0.05742133,0.276726
Infection Control,-0.076916,-0.055469,0.522802,0.463441,0.447504,0.53067,0.352318,0.315381,0.437262,1.0,0.2759952,0.586358,0.604232,0.188179,0.145694,0.362886,0.585075,0.472607,0.365997,0.619108,0.2376621,0.363179
