# COVID-19 and CA hospitals

### Load python tools

In [1]:
import pandas as pd
import geopandas as gpd
import jenkspy
import matplotlib.pyplot as plt
%matplotlib inline
import json
import numpy as np
from altair import datum
import altair as alt
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

WebDriverException: Message: 'chromedriver' executable needs to be in PATH. Please see https://sites.google.com/a/chromium.org/chromedriver/home


### Hospital building characteristics

In [None]:
#https://data.chhs.ca.gov/dataset/hospital-building-data
#Metadata: https://data.chhs.ca.gov/dataset/hospital-building-data/resource/cefc10e5-5071-4ca4-8b03-2249caf0d294
buildings = pd.read_csv('input/ca-oshpd-gachospital-building-03052020.csv', encoding='Latin-1',\
                        dtype={'year_completed': object, 'building_code_year': object, 'stories': int,\
                               'height_ft': int}).fillna('')

In [None]:
buildings.columns = buildings.columns.str.strip().str.lower().str.replace(' ', '_')\
                    .str.replace('(', '').str.replace(')', '').str.replace('-','_')

In [None]:
buildings.head()

In [None]:
buildings_geo = gpd.GeoDataFrame(
    buildings, geometry=gpd.points_from_xy(buildings.longitude, buildings.latitude))

In [None]:
buildings_geo.plot()

--- 

### Facility listing

In [None]:
#https://data.chhs.ca.gov/dataset/healthcare-facility-locations/resource/0a0476ba-442c-40ff-97dc-dc840fa7e907
facilities = pd.read_excel('input/healthcare_facility_locations.xlsx')

In [None]:
facilities.columns = facilities.columns.str.strip().str.lower().str.replace(' ', '_')\
                    .str.replace('(', '').str.replace(')', '').str.replace('-','_')

In [None]:
facilities_geo = gpd.GeoDataFrame(
    facilities, geometry=gpd.points_from_xy(facilities.longitude, facilities.latitude))

In [None]:
facilities_geo.plot()

In [None]:
hospitals = facilities_geo[facilities_geo['fac_type_code'] == 'GACH']

In [None]:
hospitals['entity_type_description'].value_counts()

In [None]:
hospitals.head()

In [None]:
public_list = ['COUNTY','UC REGENT','OTHER PUBLIC AGENCY','CITY','STATE AGENCY']

In [None]:
public = hospitals[hospitals['entity_type_description'].isin(public_list)]

In [None]:
public.to_csv('output/public_hospitals_from_chhs.csv')

In [None]:
public.contact_email.tolist()

In [None]:
len(public)

In [None]:
public.head()

---

### Johns Hopkins data

In [None]:
# Cases: 
# https://services1.arcgis.com/0MSEUqKaxRlEPj5g/ArcGIS/rest/services/Coronavirus_2019_nCoV_Cases/FeatureServer/1

# Deaths: 
# https://services1.arcgis.com/0MSEUqKaxRlEPj5g/ArcGIS/rest/services/Coronavirus_2019_nCoV_Cases/FeatureServer/0

# Cases countries: 
# https://services1.arcgis.com/0MSEUqKaxRlEPj5g/ArcGIS/rest/services/Coronavirus_2019_nCoV_Cases/FeatureServer/2

# Cases/time: 
# https://services1.arcgis.com/0MSEUqKaxRlEPj5g/ArcGIS/rest/services/cases_time_v3/FeatureServer/0

---

### CHHS: Hospital Annual Utilization Report - 2018

In [None]:
# https://data.chhs.ca.gov/dataset/hospital-annual-utilization-report
# pivot table: https://data.chhs.ca.gov/dataset/hospital-annual-utilization-report/resource/4ee65e8c-ca6e-42d3-9aec-adcc56383856

In [None]:
utilization = pd.read_excel('input/hosp19_util_data_final.xlsx', sheet_name='Page 1-6')

In [None]:
utilization.columns = utilization.columns.str.strip().str.lower().str.replace(' ', '_')\
                    .str.replace('(', '').str.replace(')', '').str.replace('-','_')

In [None]:
utilization.iloc[0]

In [None]:
utilization.rename(columns={"ic_cen_days": "icu_days", "ic_lic_bed_days": "icu_bed_days",\
                     'ic_lic_beds':'icu_beds'}, inplace=True)

In [None]:
utilization[['fac_no', 'fac_name', 'fac_city', 'county', 'license_no', 'fac_zip', 'icu_beds', 'tot_lic_beds']].to_csv('output/hospital_utilization.csv', index=False)

In [None]:
utilization['icu_occupancy_rate'] = ((utilization.icu_days / utilization.icu_bed_days)*100).round()

In [None]:
utilization_geo = gpd.GeoDataFrame(
    utilization, geometry=gpd.points_from_xy(utilization.longitude, utilization.latitude))

In [None]:
utilization_geo.plot()

In [None]:
utilization_counties = utilization_geo.groupby(['county']).agg({'icu_days':'sum', 'icu_bed_days':'sum', \
                                                  'icu_beds':'sum', 'icu_occupancy_rate':'mean'}).reset_index()

In [None]:
utilization_counties.sort_values(by='icu_beds', ascending=False).head()

In [None]:
utilization_counties.fillna('').to_csv('output/icu_by_county.csv')

In [None]:
icus = utilization_geo[utilization_geo['icu_beds'] > 0]
icus_slim = icus[[ 'fac_name', 'county', 'icu_beds','icu_days', \
                  'icu_bed_days', 'icu_occupancy_rate', 'geometry' ]]

In [None]:
icus_slim.sort_values(by='icu_beds', ascending=False).head()

In [None]:
len(icus)

In [None]:
icus_slim.to_file('output/icus.geojson', driver='GeoJSON')

In [None]:
icus_slim.sort_values(by='icu_beds', ascending=False).head()

In [None]:
icus_slim.to_csv('output/icu_by_facility_nu.csv', index=False)

### ICU bed rate per 100,000 residents

In [None]:
# basically one bed for every 5,350 residents not counting 50% occupancy rate already
(icus_slim.icu_beds.sum()*100000) / 39512223

### ICU bed rate per 100,000 residents older than 65 (5.6 million in CA)

In [None]:
# basically one bed for every 37,500 residents over 65 not counting 50% occupancy rate already
(icus_slim.icu_beds.sum()*100000) / (39512223/0.143)

--- 

In [None]:
county_pop = pd.read_csv('input/processed/acs5_2018_population_counties.csv',\
                        dtype={'geoid':'object', 'state':'object', 'county':'object'})

In [None]:
ca_counties = pd.DataFrame(county_pop[county_pop['state'] == '06'])

In [None]:
ca_counties.drop(columns=['universe_annotation', 'universe_moe', 'universe_moe_annotation'], inplace=True)
ca_counties.rename(columns={"universe": "population"}, inplace=True)

In [None]:
ca_counties['county'] = ca_counties['name'].str.replace(' County, California', '')

In [None]:
ca_counties.head()

In [None]:
icus_county_pop = utilization_counties.merge(ca_counties, on='county')

In [None]:
bayarea = ['Alameda', 'Marin', 'Contra Costa', 'San Mateo', 'Santa Clara', 'San Francisco']

In [None]:
bayarea_counties = icus_county_pop[icus_county_pop['county'].isin(bayarea)]
bayarea_counties.population.sum()

In [None]:
bayarea_counties.head(6)

In [None]:
icus_county_pop.head()

In [None]:
icus_county_pop['icus_per1000'] = (icus_county_pop['icu_beds'] * 100000) / icus_county_pop['population']

In [None]:
icus_county_pop.sort_values(by='icus_per1000', ascending=False).head()

In [None]:
icus_county_pop_slim = icus_county_pop[[ 'county', 'icu_beds', 'icus_per1000']]

In [None]:
icus_county_pop_slim.to_csv('output/icus_county_pop_slim.csv')

---

In [None]:
counties_geo = gpd.read_file('/Users/mhustiles/data/data/GIS/LA/counties/2012/counties.shp')

In [None]:
counties_geo.rename(columns={"fips": "geoid"}, inplace=True)

In [None]:
counties_geo = counties_geo.to_crs({'init': 'epsg:4326'})

In [None]:
counties_geo.columns = counties_geo.columns.str.strip().str.lower().str.replace(' ', '_')\
                    .str.replace('(', '').str.replace(')', '').str.replace('-','_')

In [None]:
counties_geo_slim = counties_geo[[ 'geoid', 'name', 'geometry']]

In [None]:
icus_geo = counties_geo_slim.merge(icus_county_pop, on='geoid')

In [None]:
icus_geo.to_file('output/icus_geo.geojson', driver='GeoJSON')

In [None]:
# !tippecanoe --generate-ids --force -r1 -pk -pf -o \
# output/icus_geo.mbtiles \
# output/icus_geo.geojson