In [13]:
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)


In [14]:
# Import the data 
ltc_com = pd.read_csv('../data/BC/bc_ltc_complete.csv')
# ltc_com.info()
# ltc_com.columns.values
# for each in ltc.columns.values:
#     print(each)


# Filter Homes
## Special Unit homes

- The British Columbia Long-Term Care Quick Facts Directory contains information on 294 Long-term care facilities
- Three facilities have special care units identified in the Directory with a separate data. These are not normally counted as separate facilities.
- Special Units **do not** have complaints, incident or inspections/infractions data
- Special Units **do** have different values for **HCC_CODE, open_date, DCH, number of beds and demographic data**
- Special care units at Berkley Care Centre, Harmony Court Care Centre and Fair Haven - Vancouver are not counted separately in overall facility counts, they are counted separately for this direct care hours analysis.
    - Berkley Care Centre - Special Unit	
    - Fair Haven - Vancouver - Special Unit	
    - Harmony Court Care Centre - Special Unit
    
### Possible Actions
1. Remove 3 Special Units
2. **Merge Special Unit Homes with their parent home**
    - Weighted average of DCH and demographic data
    - Sum number beds/rooms
    - Keep complaints, incident and inspections/infractions etc. data from parent home

In [15]:
# Convert all suppressed values to NA
ltc_fil = ltc_com
ltc_fil = ltc_fil.replace('suppressed', np.nan, regex=True)

In [16]:
# Convert demographic variables to numeric type
ls = [
      'AGE', 'FEMALE', 'AGE_85_PLUS', 'AGE_UNDER_65', 'STAY_LENGTH',
      'DEPRESSION', 'ADL_DEPENDENT', 'CPS_SEVERE', 'DEMENTIA',
      'ABS_PHYS_ABUSIVE', 'ISE', 'ISE_LOW', 'CMI', 'THERAPY_PT',
      'THERAPY_RT', 'THERAPY_OT', 'MEDS_DEPRESSION',
      'MEDS_ANTIPSYCHOTICS', 'RESTRAINTS'
]

for each in ls:
    ltc_fil[each] = ltc_fil[each].astype(str).str.rstrip('%')
    ltc_fil[each] = pd.to_numeric(ltc_fil[each], errors='coerce')

In [17]:
# Add rows with new homes that have been merged with special units
# duplicate the row
ltc_fil.loc[297] = ltc_fil.iloc[109] # For Harmony Court
ltc_fil.loc[298] = ltc_fil.iloc[15] # For Berkley
ltc_fil.loc[299] = ltc_fil.iloc[81] # For Fair Haven - Vancouver

# Adjust values in the new row

# Sum beds/room values from the original rows
ls = ['BEDS_TOTAL', 'ROOMS_PRIVATE', 'ROOMS_SEMI', 'ROOMS_MULTI']

for each in ls:
    ltc_fil.loc[297, each] = ltc_fil.iloc[109][each] + ltc_fil.iloc[110][each]
    ltc_fil.loc[298, each] = ltc_fil.iloc[15][each] + ltc_fil.iloc[16][each]
    ltc_fil.loc[299, each] = ltc_fil.iloc[81][each] + ltc_fil.iloc[82][each]


# Compute a weighted average for the DCH and demographic predictors    
ls = ['DCH_NURSE_LASTYR', 'DCH_ALLIED_LASTYR', 'DCH_TOTAL_LASTYR', 
      'DCH_NURSE_CURRENTYR', 'DCH_ALLIED_CURRENTYR', 'DCH_TOTAL_CURRENTYR', 
      'AGE', 'FEMALE', 'AGE_85_PLUS', 'AGE_UNDER_65', 'STAY_LENGTH',
      'DEPRESSION', 'ADL_DEPENDENT', 'CPS_SEVERE', 'DEMENTIA',
      'ABS_PHYS_ABUSIVE', 'ISE', 'ISE_LOW', 'CMI', 'THERAPY_PT',
      'THERAPY_RT', 'THERAPY_OT', 'MEDS_DEPRESSION',
      'MEDS_ANTIPSYCHOTICS', 'RESTRAINTS', 'BEDS_PRIVATEprop']    

for each in ls:
    ltc_fil.loc[297, each] = ltc_fil.iloc[109][each]*ltc_fil.iloc[109]['BEDS_TOTAL']/ltc_fil.iloc[297]['BEDS_TOTAL'] + ltc_fil.iloc[110][each]*ltc_fil.iloc[110]['BEDS_TOTAL']/ltc_fil.iloc[297]['BEDS_TOTAL']
    ltc_fil.loc[298, each] = ltc_fil.iloc[15][each]*ltc_fil.iloc[15]['BEDS_TOTAL']/ltc_fil.iloc[298]['BEDS_TOTAL'] + ltc_fil.iloc[16][each]*ltc_fil.iloc[16]['BEDS_TOTAL']/ltc_fil.iloc[298]['BEDS_TOTAL']
    ltc_fil.loc[299, each] = ltc_fil.iloc[81][each]*ltc_fil.iloc[81]['BEDS_TOTAL']/ltc_fil.iloc[299]['BEDS_TOTAL'] + ltc_fil.iloc[82][each]*ltc_fil.iloc[82]['BEDS_TOTAL']/ltc_fil.iloc[299]['BEDS_TOTAL']
    



In [18]:
# Drop the original rows pre-merge
ltc_fil = ltc_fil.drop([ltc_fil.index[109] , ltc_fil.index[110], 
                        ltc_fil.index[15] , ltc_fil.index[16],
                        ltc_fil.index[81], ltc_fil.index[82]])

ltc_fil.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 294 entries, 0 to 299
Data columns (total 78 columns):
FACILITY_NAME              294 non-null object
HCC_CODE                   294 non-null object
HLTH_AUTH                  294 non-null object
OWNERSHIP                  294 non-null object
STREET_ADDRESS             294 non-null object
CITY                       294 non-null object
POSTAL                     294 non-null object
REGULATION                 294 non-null object
ACCRED_STATUS              294 non-null object
DCH_NURSE_LASTYR           290 non-null float64
DCH_ALLIED_LASTYR          290 non-null float64
DCH_TOTAL_LASTYR           290 non-null float64
DCH_NURSE_CURRENTYR        291 non-null float64
DCH_ALLIED_CURRENTYR       291 non-null float64
DCH_TOTAL_CURRENTYR        291 non-null float64
BEDS_TOTAL                 294 non-null int64
ROOMS_PRIVATE              294 non-null int64
ROOMS_SEMI                 294 non-null int64
ROOMS_MULTI                294 non-null int64


In [19]:
ltc_fil[ltc_fil['FACILITY_NAME'].str.contains('Special Unit', regex=False, na=False)]

Unnamed: 0,FACILITY_NAME,HCC_CODE,HLTH_AUTH,OWNERSHIP,STREET_ADDRESS,CITY,POSTAL,REGULATION,ACCRED_STATUS,DCH_NURSE_LASTYR,DCH_ALLIED_LASTYR,DCH_TOTAL_LASTYR,DCH_NURSE_CURRENTYR,DCH_ALLIED_CURRENTYR,DCH_TOTAL_CURRENTYR,BEDS_TOTAL,ROOMS_PRIVATE,ROOMS_SEMI,ROOMS_MULTI,COMPLAINTS,SUB_COMPLAINTS,INCIDENT_OUTBREAK,INCIDENT_ABUSE,INCIDENT_FALL,INCIDENT_POISON,INCIDENT_MEDICATION,INCIDENT_WANDERING,INCIDENT_INJURY,INCIDENT_AGGRESSION,INCIDENT_OUTBREAK_100,INCIDENT_ABUSE_100,INCIDENT_FALL_100,INCIDENT_POISON_100,INCIDENT_MEDICATION_100,INCIDENT_WANDERING_100,INCIDENT_INJURY_100,INCIDENT_AGGRESSION_100,AGE,FEMALE,AGE_85_PLUS,AGE_UNDER_65,STAY_LENGTH,DEPRESSION,ADL_DEPENDENT,CPS_SEVERE,DEMENTIA,ABS_PHYS_ABUSIVE,ISE,ISE_LOW,CMI,THERAPY_PT,THERAPY_RT,THERAPY_OT,MEDS_DEPRESSION,MEDS_ANTIPSYCHOTICS,RESTRAINTS,INSPECTIONS,INFRACTIONS,INFRACTIONS_LICENSING,INFRACTIONS_FACILITY,INFRACTIONS_STAFFING,INFRACTIONS_POLICY,INFRACTIONS_CARE,INFRACTIONS_DISEASE,INFRACTIONS_FOOD,INFRACTIONS_MEDICATION,INFRACTIONS_PROGRAM,INFRACTIONS_REPORTING,INFRACTIONS_RIGHTS,INFRACTIONS_OTHER,latitude,longitude,Total Confirmed Cases,Total Deaths,outbreak,RESIDENT_COUNCIL,FAMILY_COUNCIL,BEDS_PRIVATEprop


## Homes with suppressed data

Data has been suppressed for facilities with 5 beds or less:
1. Bella Coola General Hospital (VCHA)
2. Mackenzie & District Hospital and Health Centre (NHA)
3. Northern Haida Gwaii Hospital and Health Centre (NHA)
4. R.W. Large Memorial Hospital (6 beds but suppressed at VCHA's request)

Suppressed data include:
    
    COMPLAINTS
    SUB_COMPLAINTS
    
    INCIDENT_OUTBREAK
    INCIDENT_ABUSE
    INCIDENT_FALL
    INCIDENT_POISON
    INCIDENT_MEDICATION
    INCIDENT_WANDERING
    INCIDENT_INJURY
    INCIDENT_AGGRESSION
    INCIDENT_OUTBREAK_100
    INCIDENT_ABUSE_100
    INCIDENT_FALL_100
    INCIDENT_POISON_100
    INCIDENT_MEDICATION_100
    INCIDENT_WANDERING_100
    INCIDENT_INJURY_100
    INCIDENT_AGGRESSION_100
    
    AGE
    FEMALE
    AGE_85_PLUS
    AGE_UNDER_65
    STAY_LENGTH
    DEPRESSION
    ADL_DEPENDENT
    CPS_SEVERE
    DEMENTIA
    ABS_PHYS_ABUSIVE
    ISE
    ISE_LOW
    CMI
    
    THERAPY_PT
    THERAPY_RT
    THERAPY_OT
    
    MEDS_DEPRESSION
    MEDS_ANTIPSYCHOTICS
    RESTRAINTS
    
# Actions:
   1. **Remove homes with suppressed data only (4 homes)**
   2. Remove homes with fewer than 8 beds (8 homes) or 11 (12 homes)

In [8]:
# Review affected homes
len(ltc_fil[['FACILITY_NAME', 'BEDS_TOTAL']].loc[ltc_fil['BEDS_TOTAL'] < 11])


12

In [9]:
# Remove the 4 homes with suppressed data
ls =['Bella Coola General Hospital', 
     'Mackenzie and District Hospital and Health Centre',
     'Northern Haida Gwaii Hospital and Health Centre', 
     'R. W. Large Memorial Hospital']

ltc_fil2 = ltc_fil[~ltc_fil['FACILITY_NAME'].isin(ls)]
# ltc_fil2.info()


## New facility with limited data
There is one new facility added this year. Cariboo Place opened on April 1, 2019. It is included for searchability, but will have no indicator data for 2018/19.

Missing data include:

    FOOD_COST_LASTYR
    FOOD_COST_CURRENTYR

    DCH_NURSE_LASTYR
    DCH_ALLIED_LASTYR
    DCH_TOTAL_LASTYR
    DCH_NURSE_CURRENTYR
    DCH_ALLIED_CURRENTYR
    DCH_TOTAL_CURRENTYR

    COMPLAINTS
    SUB_COMPLAINTS

    INCIDENT_OUTBREAK
    INCIDENT_ABUSE
    INCIDENT_FALL
    INCIDENT_POISON
    INCIDENT_MEDICATION
    INCIDENT_WANDERING
    INCIDENT_INJURY
    INCIDENT_AGGRESSION
    INCIDENT_OUTBREAK_100
    INCIDENT_ABUSE_100
    INCIDENT_FALL_100
    INCIDENT_POISON_100
    INCIDENT_MEDICATION_100
    INCIDENT_WANDERING_100
    INCIDENT_INJURY_100
    INCIDENT_AGGRESSION_100

    AGE
    FEMALE
    AGE_85_PLUS
    AGE_UNDER_65
    STAY_LENGTH
    DEPRESSION
    ADL_DEPENDENT
    CPS_SEVERE
    DEMENTIA
    ABS_PHYS_ABUSIVE
    ISE
    ISE_LOW
    CMI
    
    THERAPY_PT
    THERAPY_RT
    THERAPY_OT
    
    MEDS_DEPRESSION
    MEDS_ANTIPSYCHOTICS
    RESTRAINTS

    SURVEY_URL
    PER_DIEM_LASTYR
    PER_DIEM_CURRENTYR
    
## Action
Remove the new home

In [10]:
# # Review Cariboo home
# ltc[ltc['FACILITY_NAME'].str.contains('Cariboo', regex=False, na=False)]

# Remove Cariboo home
ltc_fil2 = ltc_fil2[~ltc_fil2.FACILITY_NAME.str.contains("Cariboo", na=False)]

ltc_fil2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 289 entries, 0 to 299
Data columns (total 78 columns):
FACILITY_NAME              289 non-null object
HCC_CODE                   289 non-null object
HLTH_AUTH                  289 non-null object
OWNERSHIP                  289 non-null object
STREET_ADDRESS             289 non-null object
CITY                       289 non-null object
POSTAL                     289 non-null object
REGULATION                 289 non-null object
ACCRED_STATUS              289 non-null object
DCH_NURSE_LASTYR           288 non-null float64
DCH_ALLIED_LASTYR          288 non-null float64
DCH_TOTAL_LASTYR           288 non-null float64
DCH_NURSE_CURRENTYR        289 non-null float64
DCH_ALLIED_CURRENTYR       289 non-null float64
DCH_TOTAL_CURRENTYR        289 non-null float64
BEDS_TOTAL                 289 non-null int64
ROOMS_PRIVATE              289 non-null int64
ROOMS_SEMI                 289 non-null int64
ROOMS_MULTI                289 non-null int64


In [11]:
# Remove predictors with missing values from the remaining dataset
ltc_fil2 = ltc_fil2.dropna(axis=1)
ltc_fil2.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 289 entries, 0 to 299
Data columns (total 36 columns):
FACILITY_NAME             289 non-null object
HCC_CODE                  289 non-null object
HLTH_AUTH                 289 non-null object
OWNERSHIP                 289 non-null object
STREET_ADDRESS            289 non-null object
CITY                      289 non-null object
POSTAL                    289 non-null object
REGULATION                289 non-null object
ACCRED_STATUS             289 non-null object
DCH_NURSE_CURRENTYR       289 non-null float64
DCH_ALLIED_CURRENTYR      289 non-null float64
DCH_TOTAL_CURRENTYR       289 non-null float64
BEDS_TOTAL                289 non-null int64
ROOMS_PRIVATE             289 non-null int64
ROOMS_SEMI                289 non-null int64
ROOMS_MULTI               289 non-null int64
INSPECTIONS               289 non-null float64
INFRACTIONS               289 non-null float64
INFRACTIONS_LICENSING     289 non-null float64
INFRACTIONS_FACILIT

In [12]:
# Export the dataset
ltc_fil2.to_csv(r'../data/BC/ngan_bc_ltc_5homes.csv', index = False)