### HHS Hospital Data

In [1]:
import pandas as pd
import geopandas as gpd
import jenkspy
import matplotlib.pyplot as plt
%matplotlib inline
import json
import numpy as np
from altair import datum
import altair as alt
import shapely
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [2]:
# Data dictionary: https://healthdata.gov/covid-19-reported-patient-impact-and-hospital-capacity-facility-data-dictionary
# https://healthdata.gov/dataset/covid-19-reported-patient-impact-and-hospital-capacity-facility

### Fetch the latest url to download the data

In [3]:
metadata = pd.read_json('https://healthdata.gov/api/3/action/package_show?id=d475cc4e-83cd-4c16-be57-9105f300e0bc&page=0').result[0]['resources']

### Read the url into a dataframe

In [4]:
src = pd.read_csv(metadata[0]['url'],\
                 dtype={'fips_code':str, 'zip':str}, infer_datetime_format=True, parse_dates=True)

In [5]:
hhs_hospitals = src.copy()

### What's the most recent collection week?

In [6]:
hhs_hospitals['collection_week'] = pd.to_datetime(hhs_hospitals['collection_week'])

In [7]:
hhs_hospitals['collection_week'].max()

Timestamp('2020-12-04 00:00:00')

### Get rid of the -999999 suppressed values

In [8]:
cols = hhs_hospitals.select_dtypes(np.number).columns

In [9]:
hhs_hospitals[cols] = hhs_hospitals[cols].mask(np.isclose(hhs_hospitals[cols].values, -999999))

In [10]:
hhs_hospitals[cols] = hhs_hospitals[cols].fillna('0').astype(int)

In [11]:
hhs_hospitals.head()

Unnamed: 0,hospital_pk,collection_week,state,ccn,hospital_name,address,city,zip,hospital_subtype,fips_code,is_metro_micro,total_beds_7_day_avg,all_adult_hospital_beds_7_day_avg,all_adult_hospital_inpatient_beds_7_day_avg,inpatient_beds_used_7_day_avg,all_adult_hospital_inpatient_bed_occupied_7_day_avg,total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg,total_adult_patients_hospitalized_confirmed_covid_7_day_avg,total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg,total_pediatric_patients_hospitalized_confirmed_covid_7_day_avg,inpatient_beds_7_day_avg,total_icu_beds_7_day_avg,total_staffed_adult_icu_beds_7_day_avg,icu_beds_used_7_day_avg,staffed_adult_icu_bed_occupancy_7_day_avg,...,previous_day_admission_adult_covid_confirmed_7_day_sum,previous_day_admission_adult_covid_confirmed_18-19_7_day_sum,previous_day_admission_adult_covid_confirmed_20-29_7_day_sum,previous_day_admission_adult_covid_confirmed_30-39_7_day_sum,previous_day_admission_adult_covid_confirmed_40-49_7_day_sum,previous_day_admission_adult_covid_confirmed_50-59_7_day_sum,previous_day_admission_adult_covid_confirmed_60-69_7_day_sum,previous_day_admission_adult_covid_confirmed_70-79_7_day_sum,previous_day_admission_adult_covid_confirmed_80+_7_day_sum,previous_day_admission_adult_covid_confirmed_unknown_7_day_sum,previous_day_admission_pediatric_covid_confirmed_7_day_sum,previous_day_covid_ED_visits_7_day_sum,previous_day_admission_adult_covid_suspected_7_day_sum,previous_day_admission_adult_covid_suspected_18-19_7_day_sum,previous_day_admission_adult_covid_suspected_20-29_7_day_sum,previous_day_admission_adult_covid_suspected_30-39_7_day_sum,previous_day_admission_adult_covid_suspected_40-49_7_day_sum,previous_day_admission_adult_covid_suspected_50-59_7_day_sum,previous_day_admission_adult_covid_suspected_60-69_7_day_sum,previous_day_admission_adult_covid_suspected_70-79_7_day_sum,previous_day_admission_adult_covid_suspected_80+_7_day_sum,previous_day_admission_adult_covid_suspected_unknown_7_day_sum,previous_day_admission_pediatric_covid_suspected_7_day_sum,previous_day_total_ED_visits_7_day_sum,previous_day_admission_influenza_confirmed_7_day_sum
0,ff9032c172057190bf52f523d253c79af2d5f74d007084...,2020-12-04,PR,,HealthproMed,,,775,Short Term,,False,6,4,4,0,0,0,0,0,0,6,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0
1,ee04edd185865c38c839812cb2eb5ae5d3f8922e3b629e...,2020-12-04,LA,,Alexandria Emergency Hospital,5900 Coliseum Blvd,Alexandria,71303,Short Term,,False,14,12,12,0,0,0,0,0,0,14,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,aab2bb3ab769da90baf57242c96ec481afb5ec6a233784...,2020-12-04,LA,,Crescent City Surgical Centre,,,70118,Short Term,,False,10,10,10,7,7,0,0,0,0,10,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,df2363988746840134806aa7b2accf9c89259601776986...,2020-12-04,PR,,Hospital San Antonio,"#18 Norte, Calle Dr",Mayagüez,680,Short Term,,False,75,43,43,21,13,0,0,0,0,57,12,4,0,0,...,0,0,0,0,0,0,0,0,0,0,0,93,0,0,0,0,0,0,0,0,0,0,0,163,0
4,3b081d5ef1c552538e4af4aa593a857bb922a4f364a412...,2020-12-04,LA,,Surgery Center of Zachary,,,70791,Short Term,,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


---

### Calculate some hospitalization/Covid rates

In [12]:
hhs_hospitals.rename(columns={'total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg':'covid_patients_avg', \
                          'all_adult_hospital_inpatient_bed_occupied_7_day_avg':'total_beds_avg',\
                         'icu_beds_used_7_day_avg':'used_icu_beds_avg',\
                          'total_icu_beds_7_day_avg':'total_icu_beds_avg'}, inplace=True)

#### What share of hospitalized patients have Covid?

In [13]:
hhs_hospitals['covid_rate'] = round((hhs_hospitals[hhs_hospitals['covid_patients_avg']>0]['covid_patients_avg'] /\
                          hhs_hospitals[hhs_hospitals['covid_patients_avg']>0]['total_beds_avg'])*100,2)

#### What's the I.C.U. capacity at each facility?

In [14]:
hhs_hospitals['icu_capacity'] = round((hhs_hospitals[hhs_hospitals['used_icu_beds_avg']>0]['used_icu_beds_avg'] /\
                          hhs_hospitals[hhs_hospitals['used_icu_beds_avg']>0]['total_icu_beds_avg'])*100,2)

---

### Filter the dataframe for just California and Los Angeles County facilities

In [15]:
ca = pd.DataFrame(hhs_hospitals[hhs_hospitals['state'] == 'CA'])

In [16]:
la = ca[ca['fips_code'] == '06037']

### Filter the California dataframe to the most recent collection week

In [17]:
current_ca = ca[ca['collection_week'] == ca['collection_week'].max()].sort_values('total_adult_patients_hospitalized_confirmed_covid_7_day_avg', ascending=False)

In [18]:
current_la = la[la['collection_week'] == la['collection_week'].max()].sort_values('total_adult_patients_hospitalized_confirmed_covid_7_day_avg', ascending=False)

### Which facilities in LA currently have the most covid patients? 

In [19]:
current_la.sort_values('total_adult_patients_hospitalized_confirmed_covid_7_day_avg', ascending=False).head()

Unnamed: 0,hospital_pk,collection_week,state,ccn,hospital_name,address,city,zip,hospital_subtype,fips_code,is_metro_micro,total_beds_7_day_avg,all_adult_hospital_beds_7_day_avg,all_adult_hospital_inpatient_beds_7_day_avg,inpatient_beds_used_7_day_avg,total_beds_avg,covid_patients_avg,total_adult_patients_hospitalized_confirmed_covid_7_day_avg,total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg,total_pediatric_patients_hospitalized_confirmed_covid_7_day_avg,inpatient_beds_7_day_avg,total_icu_beds_avg,total_staffed_adult_icu_beds_7_day_avg,used_icu_beds_avg,staffed_adult_icu_bed_occupancy_7_day_avg,...,previous_day_admission_adult_covid_confirmed_20-29_7_day_sum,previous_day_admission_adult_covid_confirmed_30-39_7_day_sum,previous_day_admission_adult_covid_confirmed_40-49_7_day_sum,previous_day_admission_adult_covid_confirmed_50-59_7_day_sum,previous_day_admission_adult_covid_confirmed_60-69_7_day_sum,previous_day_admission_adult_covid_confirmed_70-79_7_day_sum,previous_day_admission_adult_covid_confirmed_80+_7_day_sum,previous_day_admission_adult_covid_confirmed_unknown_7_day_sum,previous_day_admission_pediatric_covid_confirmed_7_day_sum,previous_day_covid_ED_visits_7_day_sum,previous_day_admission_adult_covid_suspected_7_day_sum,previous_day_admission_adult_covid_suspected_18-19_7_day_sum,previous_day_admission_adult_covid_suspected_20-29_7_day_sum,previous_day_admission_adult_covid_suspected_30-39_7_day_sum,previous_day_admission_adult_covid_suspected_40-49_7_day_sum,previous_day_admission_adult_covid_suspected_50-59_7_day_sum,previous_day_admission_adult_covid_suspected_60-69_7_day_sum,previous_day_admission_adult_covid_suspected_70-79_7_day_sum,previous_day_admission_adult_covid_suspected_80+_7_day_sum,previous_day_admission_adult_covid_suspected_unknown_7_day_sum,previous_day_admission_pediatric_covid_suspected_7_day_sum,previous_day_total_ED_visits_7_day_sum,previous_day_admission_influenza_confirmed_7_day_sum,covid_rate,icu_capacity
377,50625,2020-12-04,CA,50625,CEDARS-SINAI MEDICAL CENTER,8700 BEVERLY BLVD,LOS ANGELES,90048,Short Term,6037,True,871,829,829,815,804,134,114,0,0,871,165,108,124,105,...,4,11,5,14,19,10,12,0,0,171,40,0,0,0,0,5,10,10,7,0,0,1415,0,16.67,75.15
413,50231,2020-12-04,CA,50231,POMONA VALLEY HOSPITAL MEDICAL CENTER,1798 N GAREY AVE,POMONA,91767,Short Term,6037,True,413,379,359,286,283,106,106,0,0,393,113,60,96,58,...,7,11,11,22,34,27,20,0,0,532,0,0,0,0,0,0,0,0,0,0,0,1670,0,37.46,84.96
358,50139,2020-12-04,CA,50139,KAISER FOUNDATION HOSPITAL - DOWNEY,9333 IMPERIAL HIGHWAY,DOWNEY,90242,Short Term,6037,True,448,382,304,222,212,106,105,0,0,370,75,30,52,28,...,13,11,21,31,30,24,16,0,0,819,9,0,0,0,0,0,0,0,0,0,0,1761,0,50.0,69.33
416,50373,2020-12-04,CA,50373,LAC+USC MEDICAL CENTER,"1200 N STATE ST, ROOM C2K100",LOS ANGELES,90033,Short Term,6037,True,648,606,606,539,511,106,98,0,0,648,132,107,115,92,...,10,0,18,23,27,14,6,0,0,276,15,0,0,0,4,0,0,0,0,0,0,2281,0,20.74,87.12
366,50169,2020-12-04,CA,50169,PIH HEALTH HOSPITAL-WHITTIER,12401 WASHINGTON BLVD,WHITTIER,90602,Short Term,6037,True,437,437,415,286,286,92,90,0,0,415,82,48,46,33,...,0,7,7,9,17,13,19,0,0,415,0,0,0,0,0,0,0,0,0,0,0,1241,0,32.17,56.1


---

### Get medical facilities' geographic data

In [20]:
# From HHS: https://maps3.arcgisonline.com/arcgis/rest/services/A-16/HHS_IOM_Health_Resources/MapServer

In [21]:
hospitals = gpd.read_file('/Users/mhustiles/data/github/AGStoShapefile/backupdir/HHS_Hospital/HHS_Hospital_1608139617293.geojson')

In [22]:
medical_centers = gpd.read_file('/Users/mhustiles/data/github/AGStoShapefile/backupdir/HHS_Medical_Center/HHS_Medical_Center_1608139616289.geojson')

### Clean up the column names so we can merge the facilities dataframes

In [23]:
facilities = [hospitals, medical_centers]

In [24]:
hospitals.rename(columns={'Name_new':'name', 'Address_1':'address', 'City':'city', 'State_1':'state', 'ZipCode':'zipcode',\
                               'PhoneNum':'phone', 'County_Nam':'county', 'Provider_N':'provider_id', 'Hospital_T':'type', 'Hospital_O':'operation',\
                               'Emergency_':'emergency'}, inplace=True)

In [25]:
medical_centers.rename(columns={'Hospital_N':'name', 'Address1':'address', 'City_1':'city', 'State_1':'state', 'ZipCode':'zipcode',\
                               'PhoneNum':'phone', 'County_Nam':'county', 'Provider_N':'provider_id', 'Hospital_T':'type', 'Hospital_O':'operation',\
                               'Emergency_':'emergency'}, inplace=True)

### Concatenate the different facility types into one dataframe, and also filter that just to CA

In [26]:
all_medical_geo = pd.concat(facilities)

In [27]:
all_medical_geo_ca = all_medical_geo[all_medical_geo['state'] == 'CA']

---

### Merge with our CA medical facilities

In [28]:
current_ca_geo = all_medical_geo.merge(current_ca, left_on='provider_id', right_on='ccn')

In [29]:
len(current_ca_geo)

301

In [30]:
current_ca_geo.head()

Unnamed: 0,OBJECTID,name,address_x,city_x,state_x,zipcode,phone,county,provider_id,type,operation,emergency,geometry,hospital_pk,collection_week,state_y,ccn,hospital_name,address_y,city_y,zip,hospital_subtype,fips_code,is_metro_micro,total_beds_7_day_avg,...,previous_day_admission_adult_covid_confirmed_20-29_7_day_sum,previous_day_admission_adult_covid_confirmed_30-39_7_day_sum,previous_day_admission_adult_covid_confirmed_40-49_7_day_sum,previous_day_admission_adult_covid_confirmed_50-59_7_day_sum,previous_day_admission_adult_covid_confirmed_60-69_7_day_sum,previous_day_admission_adult_covid_confirmed_70-79_7_day_sum,previous_day_admission_adult_covid_confirmed_80+_7_day_sum,previous_day_admission_adult_covid_confirmed_unknown_7_day_sum,previous_day_admission_pediatric_covid_confirmed_7_day_sum,previous_day_covid_ED_visits_7_day_sum,previous_day_admission_adult_covid_suspected_7_day_sum,previous_day_admission_adult_covid_suspected_18-19_7_day_sum,previous_day_admission_adult_covid_suspected_20-29_7_day_sum,previous_day_admission_adult_covid_suspected_30-39_7_day_sum,previous_day_admission_adult_covid_suspected_40-49_7_day_sum,previous_day_admission_adult_covid_suspected_50-59_7_day_sum,previous_day_admission_adult_covid_suspected_60-69_7_day_sum,previous_day_admission_adult_covid_suspected_70-79_7_day_sum,previous_day_admission_adult_covid_suspected_80+_7_day_sum,previous_day_admission_adult_covid_suspected_unknown_7_day_sum,previous_day_admission_pediatric_covid_suspected_7_day_sum,previous_day_total_ED_visits_7_day_sum,previous_day_admission_influenza_confirmed_7_day_sum,covid_rate,icu_capacity
0,107,Watsonville Community Hospital,75 Nielson Street,Watsonville,CA,95076,831-724-4741,Santa Cruz,50194,Acute Care,Proprietary,Yes,POINT (-121.72403 36.91659),50194,2020-12-04,CA,50194,WATSONVILLE COMMUNITY HOSPITAL,75 NIELSON STREET,WATSONVILLE,95076,Short Term,6087,True,100,...,0,0,0,0,0,4,0,0,0,115,16,0,0,0,0,0,0,0,0,0,0,412,0,56.76,43.75
1,108,Dominican Hospital,1555 Soquel Drive,Santa Cruz,CA,95065,831-462-7700,Santa Cruz,50242,Acute Care,Voluntary non-profit - Church,Yes,POINT (-121.98426 36.98825),50242,2020-12-04,CA,50242,DOMINICAN HOSPITAL,1555 SOQUEL DRIVE,SANTA CRUZ,95065,Short Term,6087,True,248,...,0,0,5,0,7,6,4,0,0,289,0,0,0,0,0,0,0,0,0,0,0,808,0,16.78,53.33
2,109,Patients' Hospital Of Redding,2900 Eureka Way,Redding,CA,96001,530-225-8700,Shasta,50697,Acute Care,Proprietary,Yes,POINT (-122.41630 40.58554),50697,2020-12-04,CA,50697,PATIENTS' HOSPITAL OF REDDING,2900 EUREKA WAY,REDDING,96001,Short Term,6089,True,10,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,
3,110,Mayers Memorial Hospital,43563 Hwy 299 East,Fall River Mills,CA,96028,530-336-5511,Shasta,51305,Critical Access,Government - Hospital District or Authority,Yes,POINT (-121.42217 41.02665),51305,2020-12-04,CA,51305,MAYERS MEMORIAL HOSPITAL,43563 HWY 299 EAST,FALL RIVER MILLS,96028,Critical Access Hospitals,6089,True,16,...,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,56,0,,
4,111,Kaiser Foundation Hospital Vallejo,975 Sereno Dr,Vallejo,CA,94589,707-651-1000,Solano,50073,Acute Care,Voluntary non-profit - Other,Yes,POINT (-122.24761 38.12838),50073,2020-12-04,CA,50073,KAISER FOUNDATION HOSPITAL AND REHAB CENTER,975 SERENO DR,VALLEJO,94589,Short Term,6095,True,179,...,4,6,0,7,9,5,0,0,0,86,11,0,0,0,0,0,0,0,4,0,0,982,0,22.15,88.89


In [31]:
current_ca_geo.rename(columns={'city_x':'city', 'address_x':'address', 'state_x':'state'}, inplace=True)

In [32]:
current_ca_geo.drop(['state_y', 'address_y', 'city_y'], axis=1, inplace=True)

---

### Which hospitals in the state and L.A. County have the highest Covid patient rate?

In [33]:
current_la_geo_slim = current_ca_geo[current_ca_geo['fips_code'] == '06037'][['collection_week', 'hospital_name', 'city', 'covid_patients_avg', 'total_beds_avg', 'covid_rate', 'icu_capacity', 'geometry']]\
    .sort_values('icu_capacity', ascending=False)

In [34]:
current_ca_geo_slim = current_ca_geo[['collection_week', 'hospital_name', 'city', 'covid_patients_avg', 'total_beds_avg', 'covid_rate', 'icu_capacity', 'geometry']]\
    .sort_values('icu_capacity', ascending=False)

### California

In [35]:
current_ca_geo_slim.sort_values('covid_rate', ascending=False).head(10)

Unnamed: 0,collection_week,hospital_name,city,covid_patients_avg,total_beds_avg,covid_rate,icu_capacity,geometry
123,2020-12-04,BARSTOW COMMUNITY HOSPITAL,Barstow,34,30,113.33,87.5,POINT (-117.01964 34.89409)
130,2020-12-04,MONTCLAIR HOSPITAL MEDICAL CENTER,Montclair,30,43,69.77,90.0,POINT (-117.69643 34.07766)
129,2020-12-04,DESERT VALLEY HOSPITAL,Victorville,85,123,69.11,95.24,POINT (-117.29803 34.47091)
31,2020-12-04,ADVENTIST HEALTH TEHACHAPI VALLEY,Tehachapi,11,16,68.75,,POINT (-118.44822 35.13002)
127,2020-12-04,KAISER FOUNDATION HOSPITAL FONTANA,Fontana,226,385,58.7,88.39,POINT (-117.43560 34.07220)
198,2020-12-04,EL CENTRO REGIONAL MEDICAL CENTER,El Centro,73,125,58.4,95.45,POINT (-115.49811 32.78143)
69,2020-12-04,GREATER EL MONTE COMMUNITY HOSPITAL,South El Monte,25,43,58.14,100.0,POINT (-118.04251 34.04916)
79,2020-12-04,EAST LOS ANGELES DOCTORS HOSPITAL,Los Angeles,22,38,57.89,70.0,POINT (-118.18379 34.02388)
128,2020-12-04,VICTOR VALLEY GLOBAL MEDICAL CENTER,Victorville,55,96,57.29,100.0,POINT (-117.29253 34.52814)
0,2020-12-04,WATSONVILLE COMMUNITY HOSPITAL,Watsonville,21,37,56.76,43.75,POINT (-121.72403 36.91659)


### L.A. County

In [36]:
current_la_geo_slim.sort_values('covid_rate', ascending=False).head(10)

Unnamed: 0,collection_week,hospital_name,city,covid_patients_avg,total_beds_avg,covid_rate,icu_capacity,geometry
69,2020-12-04,GREATER EL MONTE COMMUNITY HOSPITAL,South El Monte,25,43,58.14,100.0,POINT (-118.04251 34.04916)
79,2020-12-04,EAST LOS ANGELES DOCTORS HOSPITAL,Los Angeles,22,38,57.89,70.0,POINT (-118.18379 34.02388)
75,2020-12-04,EMANATE HEALTH FOOTHILL PRESBYTERIAN HOSPITAL,Glendora,38,70,54.29,77.78,POINT (-117.87253 34.13286)
43,2020-12-04,KAISER FOUNDATION HOSPITAL - DOWNEY,Downey,106,212,50.0,69.33,POINT (-118.12902 33.91716)
210,2020-12-04,SAN GABRIEL VALLEY MEDICAL CENTER,San Gabriel,41,85,48.24,64.0,POINT (-118.10595 34.10246)
49,2020-12-04,COMMUNITY HOSPITAL OF HUNTINGTON PARK,Huntington Park,26,54,48.15,100.0,POINT (-118.22414 33.98917)
72,2020-12-04,KAISER FOUNDATION HOSPITAL - BALDWIN PARK,Baldwin Park,82,173,47.4,38.37,POINT (-117.98481 34.06447)
54,2020-12-04,PALMDALE REGIONAL MEDICAL CENTER,Lancaster,62,134,46.27,91.67,POINT (-118.14790 34.67903)
74,2020-12-04,SAN DIMAS COMMUNITY HOSPITAL,San Dimas,20,44,45.45,87.5,POINT (-117.83436 34.09910)
58,2020-12-04,BEVERLY HOSPITAL,Montebello,52,116,44.83,64.71,POINT (-118.10018 34.01596)


---

### Export data

#### Export current CA geography with rates

In [37]:
current_la_geo_slim.to_file('output/current_la_geo_slim.geojson', driver='GeoJSON')

In [38]:
current_ca_geo_slim.to_file('output/current_ca_geo_slim.geojson', driver='GeoJSON')