# Analyzing HHS hospital occupancy data

#### An analysis of HHS hospital data, by @datagraphics and @stiles.

#### **Questions?** [matt.stiles@latimes.com](matt.stiles@latimes.com) \\ 310.529.8749

---

### Import Python tools

In [1]:
import pandas as pd
import geopandas as gpd
import jenkspy
import matplotlib.pyplot as plt
%matplotlib inline
import json
import numpy as np
import altair as alt
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

---

### Define cities and regions metadata

In [2]:
metadata_df = pd.read_csv("input/city-metadata.csv", dtype={"fips": str})

In [3]:
metadata_df = metadata_df[["county", "fips", "population", "region"]]

In [4]:
socal = ['Los Angeles', 'Orange', 'Ventura', 'San Bernardino', 'Riverside']
bayarea = ['Alameda', 'Contra Costa', 'Marin', 'Napa', 'San Francisco', 'San Mateo', 'Santa Clara', 'Solano', 'Sonoma']

### Get the latest url from the HHS API and read in the latest dataframe

In [5]:
# Data dictionary: https://healthdata.gov/covid-19-reported-patient-impact-and-hospital-capacity-facility-data-dictionary
# Data source: https://healthdata.gov/dataset/covid-19-reported-patient-impact-and-hospital-capacity-facility

In [6]:
metadata = pd.read_json('https://healthdata.gov/api/3/action/package_show?id=d475cc4e-83cd-4c16-be57-9105f300e0bc&page=0').result[0]['resources']

In [7]:
src = pd.read_csv(metadata[0]['url'],\
                 dtype={'fips_code':str, 'zip':str}, infer_datetime_format=True, parse_dates=True)

In [8]:
# src = pd.read_csv('https://healthdata.gov/sites/default/files/reported_hospital_capacity_admissions_facility_level_weekly_average_timeseries_20201228.csv',\
# dtype={'fips_code':str, 'zip':str}, infer_datetime_format=True, parse_dates=True)

In [104]:
df = src.copy()

In [105]:
ca_data = pd.DataFrame(df.loc[df.state == "CA"])

In [106]:
ca_data["fips"] = ca_data["fips_code"].str[2:5]

In [107]:
ca_timeseries = pd.merge(
    ca_data, metadata_df, how="left", left_on="fips", right_on="fips"
)

### What's the most recent collection week?

In [13]:
ca_timeseries['collection_week'] = pd.to_datetime(ca_timeseries['collection_week'])

In [14]:
ca_timeseries['collection_week'].max()

Timestamp('2020-12-25 00:00:00')

### Get rid of the -999999 suppressed values

In [15]:
ca_timeseries = ca_timeseries.replace([-999999.0], [0])

In [16]:
ca_timeseries['hospital_name'] = (ca_timeseries['hospital_name']).str.title()
ca_timeseries['hospital_name'] = (ca_timeseries['hospital_name']).str.replace(' Of ', ' of ').str.replace('Hlth', 'Health').str.replace(' La', ' LA')

---

## Calculate some hospitalization/covid rates

#### Calculate daily average of total staffed ICU beds

In [17]:
ca_timeseries["total_staffed_adult_icu_beds"] = round((
    ca_timeseries["total_staffed_adult_icu_beds_7_day_sum"]
    / ca_timeseries["total_staffed_adult_icu_beds_7_day_coverage"]
),0)

#### Calculate daily average of occupied ICU beds

In [18]:
ca_timeseries["total_occupied_adult_icu_beds"] = round((
    ca_timeseries["staffed_adult_icu_bed_occupancy_7_day_sum"]
    / ca_timeseries["staffed_adult_icu_bed_occupancy_7_day_coverage"]
),0)

#### Calculate daily average of COVID patients in the ICU

In [19]:
ca_timeseries["total_covid_icu_patients"] = round((
    ca_timeseries["staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum"]
    / ca_timeseries[
        "staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_coverage"
    ]
),0)

#### Number of ICU beds available

In [20]:
ca_timeseries["total_available_adult_icu_beds"] = round((
    ca_timeseries["total_staffed_adult_icu_beds"]
    - ca_timeseries["total_occupied_adult_icu_beds"]
),0)

#### Calculate daily ICU occupancy as percentage

In [21]:
ca_timeseries["pct_occupied_adult_icu_beds"] = round((
    ca_timeseries["total_occupied_adult_icu_beds"]
    / ca_timeseries["total_staffed_adult_icu_beds"]
),2)

#### If a hospital reports 0 staffed adult ICU beds, drop them from the dataframe

In [22]:
filtered_timeseries = ca_timeseries[
    (ca_timeseries.total_staffed_adult_icu_beds_7_day_sum.notnull())
    & (ca_timeseries.total_staffed_adult_icu_beds_7_day_sum != 0)
].sort_values("total_staffed_adult_icu_beds_7_day_sum")

#### How many patients?

In [23]:
ca_timeseries["all_patients"] = round((
    ca_timeseries["inpatient_beds_used_7_day_sum"]
    / ca_timeseries["inpatient_beds_used_7_day_coverage"]
),0)

#### How many Covid patients

In [24]:
ca_timeseries["total_adult_covid_patients"] = (
    ca_timeseries[
        "total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum"
    ]
    / ca_timeseries[
        "total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage"
    ]
)

In [25]:
ca_timeseries["total_pediatric_covid_patients"] = (
    ca_timeseries[
        "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum"
    ]
    / ca_timeseries[
        "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage"
    ]
)

In [26]:
ca_timeseries["total_covid_patients"] = round(ca_timeseries[
    "total_adult_covid_patients"
] + ca_timeseries["total_pediatric_covid_patients"],0).fillna(0)

#### What's the rate of Covid patients?

In [27]:
ca_timeseries["covid_patients_share"] = round((
    ca_timeseries["total_covid_patients"] / ca_timeseries["all_patients"]
),2)

In [28]:
ca_timeseries = ca_timeseries[ca_timeseries['all_patients'] > 0]

---

### Trim to the columns we want

In [109]:
trimmed_timeseries = ca_timeseries[
    [
        "hospital_name",
        "hospital_subtype",
        "ccn",
        "collection_week",
        "county",
        "fips",
        "total_covid_icu_patients",
        "total_available_adult_icu_beds",
        "pct_occupied_adult_icu_beds",
        "total_covid_patients",
        "all_patients",
        "covid_patients_share",
        "region"
    ]
].rename(
    columns={
        "hospital_name": "hospital",
        "collection_week": "week",
    }
)

KeyError: "['all_patients', 'covid_patients_share', 'total_covid_icu_patients', 'pct_occupied_adult_icu_beds', 'total_covid_patients', 'total_available_adult_icu_beds'] not in index"

---

### Filter the dataframe for Los Angeles County facilities

In [30]:
la = trimmed_timeseries[trimmed_timeseries['fips'] == '037']
oc = trimmed_timeseries[trimmed_timeseries['fips'] == '059']

### Filter the California dataframe to the most recent collection week

In [31]:
current_ca = trimmed_timeseries[trimmed_timeseries['week'] == trimmed_timeseries['week'].max()].sort_values('covid_patients_share', ascending=False)

In [32]:
current_la = la[la['week'] == la['week'].max()].sort_values('covid_patients_share', ascending=False)

In [33]:
current_oc = oc[oc['week'] == oc['week'].max()].sort_values('covid_patients_share', ascending=False)

In [102]:
current_ca.to_csv('output/current_ca.csv', index=False)

### Which CA hospitals have the greatest share of covid patients? 

In [34]:
current_ca.sort_values('covid_patients_share', ascending=False).head(10)

Unnamed: 0,hospital,hospital_subtype,ccn,week,county,fips,total_covid_icu_patients,total_available_adult_icu_beds,pct_occupied_adult_icu_beds,total_covid_patients,all_patients,covid_patients_share
237,Barstow Community Hospital,Short Term,50298.0,2020-12-25,San Bernardino,71,9.0,-1.0,1.11,46.0,44.0,1.05
104,Community Hospital of Huntington Park,Short Term,50091.0,2020-12-25,Los Angeles,37,9.0,0.0,1.0,45.0,49.0,0.92
110,Kaiser Foundation Hospital - Downey,Short Term,50139.0,2020-12-25,Los Angeles,37,43.0,1.0,0.98,230.0,262.0,0.88
76,Kaiser Foundation Hospital - Baldwin Park,Short Term,50723.0,2020-12-25,Los Angeles,37,26.0,0.0,1.0,161.0,188.0,0.86
229,Desert Valley Hospital,Short Term,50709.0,2020-12-25,San Bernardino,71,22.0,0.0,1.0,101.0,120.0,0.84
348,Santa Paula Hospital,Short Term,,2020-12-25,Ventura,111,1.0,2.0,0.67,15.0,18.0,0.83
212,Kaiser Foundation Hospital-Moreno Valley,Short Term,50765.0,2020-12-25,Riverside,65,15.0,0.0,1.0,74.0,93.0,0.8
162,George L Mee Memorial Hospital,Critical Access Hospitals,51336.0,2020-12-25,Monterey,53,0.0,0.0,,8.0,10.0,0.8
240,Mountains Community Hospital,Critical Access Hospitals,51312.0,2020-12-25,San Bernardino,71,0.0,0.0,,4.0,5.0,0.8
204,"Kaiser Foundation Hospital, Riverside",Short Term,50686.0,2020-12-25,Riverside,65,48.0,0.0,1.0,160.0,203.0,0.79


### Which facilities in LA have the greatest share of covid patients? 

In [35]:
current_la[(current_la['hospital_subtype'] != 'Long Term') & \
          (current_la['total_covid_icu_patients'] > 0)]\
.sort_values('covid_patients_share', ascending=False).head(10)

Unnamed: 0,hospital,hospital_subtype,ccn,week,county,fips,total_covid_icu_patients,total_available_adult_icu_beds,pct_occupied_adult_icu_beds,total_covid_patients,all_patients,covid_patients_share
104,Community Hospital of Huntington Park,Short Term,50091,2020-12-25,Los Angeles,37,9.0,0.0,1.0,45.0,49.0,0.92
110,Kaiser Foundation Hospital - Downey,Short Term,50139,2020-12-25,Los Angeles,37,43.0,1.0,0.98,230.0,262.0,0.88
76,Kaiser Foundation Hospital - Baldwin Park,Short Term,50723,2020-12-25,Los Angeles,37,26.0,0.0,1.0,161.0,188.0,0.86
94,"Martin Luther King, Jr. Community Hospital",Short Term,50779,2020-12-25,Los Angeles,37,14.0,1.0,0.97,164.0,215.0,0.76
142,Emanate Health Foothill Presbyterian Hospital,Short Term,50597,2020-12-25,Los Angeles,37,16.0,0.0,1.0,55.0,75.0,0.73
103,Providence Little Co of Mary Med Ctr San Pedro,Short Term,50078,2020-12-25,Los Angeles,37,11.0,0.0,1.0,62.0,87.0,0.71
138,San Dimas Community Hospital,Short Term,50588,2020-12-25,Los Angeles,37,17.0,0.0,1.0,45.0,64.0,0.7
107,Kaiser Foundation Hospital - Panorama City,Short Term,50137,2020-12-25,Los Angeles,37,32.0,3.0,0.92,131.0,188.0,0.7
83,East Los Angeles Doctors Hospital,Short Term,50641,2020-12-25,Los Angeles,37,7.0,2.0,0.8,29.0,42.0,0.69
137,Kaiser Foundation Hospital - West LA,Short Term,50561,2020-12-25,Los Angeles,37,31.0,29.0,0.53,115.0,170.0,0.68


### Which facilities in OC have the greatest share of covid patients? 

In [36]:
current_oc[(current_oc['hospital_subtype'] != 'Long Term') & \
          (current_oc['total_covid_icu_patients'] > 0)]\
.sort_values('covid_patients_share', ascending=False).head(10)

Unnamed: 0,hospital,hospital_subtype,ccn,week,county,fips,total_covid_icu_patients,total_available_adult_icu_beds,pct_occupied_adult_icu_beds,total_covid_patients,all_patients,covid_patients_share
191,Garden Grove Hospital & Medical Center,Short Term,50230,2020-12-25,Orange,59,10.0,0.0,1.0,47.0,63.0,0.75
186,Huntington Beach Hospital,Short Term,50526,2020-12-25,Orange,59,11.0,0.0,1.0,52.0,72.0,0.72
190,Ahmc Anaheim Regional Medical Center,Short Term,50226,2020-12-25,Orange,59,16.0,0.0,1.0,96.0,140.0,0.69
170,Kaiser Foundation Hospital - Orange County - A...,Short Term,50609,2020-12-25,Orange,59,60.0,0.0,1.0,225.0,337.0,0.67
177,Providence St. Joseph Hospital,Short Term,50069,2020-12-25,Orange,59,27.0,0.0,1.0,225.0,344.0,0.65
180,Providence St. Jude Medical Center,Short Term,50168,2020-12-25,Orange,59,26.0,2.0,0.96,192.0,298.0,0.64
168,Fountain Valley Regional Hospital & Medical Ce...,Short Term,50570,2020-12-25,Orange,59,27.0,0.0,1.0,175.0,280.0,0.62
185,Placentia Linda Hospital,Short Term,50589,2020-12-25,Orange,59,4.0,5.0,0.58,41.0,70.0,0.59
193,Los Alamitos Medical Center,Short Term,50551,2020-12-25,Orange,59,15.0,0.0,1.0,95.0,169.0,0.56
181,Memorialcare Orange Coast Medical Center,Short Term,50678,2020-12-25,Orange,59,28.0,2.0,0.94,91.0,168.0,0.54


In [37]:
current_la['covid_patients_share'] = round((current_la['covid_patients_share']*100),2)

### Output top LA hospitals for CMS table

In [38]:
current_la[(current_la['hospital_subtype'] != 'Long Term') & \
          (current_la['total_covid_icu_patients'] > 0)][['hospital', 'total_covid_patients', 'covid_patients_share']]\
.sort_values('covid_patients_share', ascending=False).head(10).to_csv('output/current_la_hospitals.csv', index=False)

### Which facilities in LA have the highest ICU capacity rates? 

In [39]:
current_la[(current_la['total_covid_icu_patients'] > 0)]\
.sort_values('pct_occupied_adult_icu_beds', ascending=False).head(10)

Unnamed: 0,hospital,hospital_subtype,ccn,week,county,fips,total_covid_icu_patients,total_available_adult_icu_beds,pct_occupied_adult_icu_beds,total_covid_patients,all_patients,covid_patients_share
60,Cedars-Sinai Medical Center,Short Term,50625,2020-12-25,Los Angeles,37,75.0,-26.0,1.24,315.0,830.0,38.0
104,Community Hospital of Huntington Park,Short Term,50091,2020-12-25,Los Angeles,37,9.0,0.0,1.0,45.0,49.0,92.0
72,Encino Hospital Medical Center,Short Term,50158,2020-12-25,Los Angeles,37,2.0,0.0,1.0,20.0,71.0,28.0
129,Whittier Hospital Medical Center,Short Term,50735,2020-12-25,Los Angeles,37,16.0,0.0,1.0,70.0,129.0,54.0
62,Centinela Hospital Medical Center,Short Term,50739,2020-12-25,Los Angeles,37,52.0,0.0,1.0,134.0,247.0,54.0
97,Kindred Hospital South Bay,Long Term,52050,2020-12-25,Los Angeles,37,3.0,0.0,1.0,24.0,55.0,44.0
100,Usc Verdugo Hills Hospital,Short Term,50124,2020-12-25,Los Angeles,37,10.0,0.0,1.0,71.0,123.0,58.0
65,Coast Plaza Hospital,Short Term,50771,2020-12-25,Los Angeles,37,5.0,0.0,1.0,28.0,48.0,58.0
81,Providence Little Company of Mary Med Ctr Torr...,Short Term,50353,2020-12-25,Los Angeles,37,23.0,0.0,1.0,159.0,267.0,60.0
90,Memorial Hospital of Gardena,Short Term,50468,2020-12-25,Los Angeles,37,20.0,0.0,1.0,67.0,169.0,40.0


### How many LA County hospitals (with Covid patients) are in our dataframe? 

In [40]:
len(current_la[(current_la['total_covid_icu_patients'] > 0)])

74

### How many have 'high' ICU occupancy rates? 

In [41]:
len(current_la[current_la['pct_occupied_adult_icu_beds'] > .90 ])

47

### Top 10 Bay Area hospotals by Covid share? 

In [42]:
current_ca[current_ca['county'].isin(bayarea)].sort_values('covid_patients_share', ascending=False).head(10)

Unnamed: 0,hospital,hospital_subtype,ccn,week,county,fips,total_covid_icu_patients,total_available_adult_icu_beds,pct_occupied_adult_icu_beds,total_covid_patients,all_patients,covid_patients_share
321,Healdsburg District Hospital,Critical Access Hospitals,51321,2020-12-25,Sonoma,97,0.0,5.0,0.0,6.0,9.0,0.67
302,Kaiser Foundation Hospital-San Jose,Short Term,50604,2020-12-25,Santa Clara,85,15.0,2.0,0.9,88.0,158.0,0.56
290,Kaiser Foundation Hospital - South San Francisco,Short Term,50070,2020-12-25,San Mateo,81,5.0,1.0,0.9,43.0,85.0,0.51
9,Alameda Hospital,Short Term,50211,2020-12-25,Alameda,1,4.0,2.0,0.75,19.0,40.0,0.48
301,Regional Medical Center of San Jose,Short Term,50125,2020-12-25,Santa Clara,85,24.0,0.0,1.0,105.0,236.0,0.44
2,Kaiser Foundation Hospital - Fremont,Short Term,50512,2020-12-25,Alameda,1,10.0,0.0,1.0,38.0,86.0,0.44
12,St Rose Hospital,Short Term,50002,2020-12-25,Alameda,1,5.0,6.0,0.6,26.0,59.0,0.44
0,Kaiser Foundation Hospital - San Leandro,Short Term,50777,2020-12-25,Alameda,1,17.0,0.0,1.0,68.0,169.0,0.4
21,Sutter Delta Medical Center,Short Term,50523,2020-12-25,Contra Costa,13,3.0,1.0,0.92,35.0,90.0,0.39
291,San Mateo Medical Center,Short Term,50113,2020-12-25,San Mateo,81,6.0,1.0,0.86,15.0,38.0,0.39


---

## Get medical facilities' geographic data

In [43]:
# From HHS: https://maps3.arcgisonline.com/arcgis/rest/services/A-16/HHS_IOM_Health_Resources/MapServer

In [44]:
hospitals = gpd.read_file('/Users/mhustiles/data/github/AGStoShapefile/backupdir/HHS_Hospital/HHS_Hospital_1608139617293.geojson')

In [45]:
medical_centers = gpd.read_file('/Users/mhustiles/data/github/AGStoShapefile/backupdir/HHS_Medical_Center/HHS_Medical_Center_1608139616289.geojson')

### Clean up the column names so we can merge the facilities dataframes

In [46]:
facilities = [hospitals, medical_centers]

In [47]:
hospitals.rename(columns={'Name_new':'name', 'Address_1':'address', 'City':'city', 'State_1':'state', 'ZipCode':'zipcode',\
                               'PhoneNum':'phone', 'County_Nam':'county', 'Provider_N':'provider_id', 'Hospital_T':'type', 'Hospital_O':'operation',\
                               'Emergency_':'emergency'}, inplace=True)

In [48]:
medical_centers.rename(columns={'Hospital_N':'name', 'Address1':'address', 'City_1':'city', 'State_1':'state', 'ZipCode':'zipcode',\
                               'PhoneNum':'phone', 'County_Nam':'county', 'Provider_N':'provider_id', 'Hospital_T':'type', 'Hospital_O':'operation',\
                               'Emergency_':'emergency'}, inplace=True)

### Concatenate the different facility types into one dataframe, and also filter that just to CA

In [49]:
all_medical_geo = pd.concat(facilities)

In [50]:
all_medical_geo_ca = all_medical_geo[all_medical_geo['state'] == 'CA']

### We might be better off using HHS' own locations data with CCNs

In [51]:
locations = pd.read_csv('input/hospital_locations.csv', dtype={'latitude':float, 'longitude':float, 'CCN':str})

In [52]:
locations.columns = locations.columns.str.strip().str.lower().str.replace(' ', '_')\
                    .str.replace('(', '').str.replace(')', '').str.replace('-','_')

### Even though it has the wrong location for White Memorial. Let's fix.

In [53]:
locations.loc[(locations.facility_name == 'WHITE MEMORIAL MEDICAL CENTER'),'longitude']=-118.2176219

In [54]:
locations.loc[(locations.facility_name == 'WHITE MEMORIAL MEDICAL CENTER'),'latitude']=34.0493044

### Convert the lon/lat fields into a geodataframe

In [55]:
locations = gpd.GeoDataFrame(
    locations, geometry=gpd.points_from_xy(locations.longitude, locations.latitude))

### And then confine it to California

In [56]:
locationsca = gpd.GeoDataFrame(locations[locations['state'] == 'CA'])

### Make the CCN string match the hospital capacity data

In [57]:
locationsca['ccn'] = locationsca['ccn'].str.zfill(6)

---

### Merge with our CA medical facilities

In [58]:
current_ca_geo = locationsca.merge(current_ca, left_on='ccn', right_on='ccn')

In [59]:
len(current_ca_geo)

345

In [60]:
current_ca_geo.rename(columns={'county_x':'county', 'city_x':'city', 'address_x':'address', 'state_x':'state'}, inplace=True)

In [61]:
# current_ca_geo.drop(['county_y', 'hospital_subtype', 'provider_id', 'ccn', 'emergency', 'address', 'OBJECTID', 'state', 'name', 'phone', 'zipcode'], axis=1, inplace=True)

---

### Filter to just LA County hospitals

In [62]:
current_la_geo = current_ca_geo[current_ca_geo['fips'] == '037']

### How many facilities in California?

In [63]:
len(current_ca_geo)

345

### How many in L.A. County?

In [64]:
len(current_la_geo)

84

---

### Export hospital specific data for maps and tables

In [65]:
current_ca_geo.to_file('output/current_ca_geo.geojson', driver='GeoJSON')

In [66]:
current_ca_geo.to_csv('output/current_ca_geo.csv', index=False)

In [67]:
current_la_geo.to_file('output/current_la_geo.geojson', driver='GeoJSON')

In [68]:
current_la_geo.to_csv('output/current_la_geo.csv', index=False)

---

## Aggregate

### What's the average share of Covid patients in hospitals statewide?

In [69]:
state = current_ca.groupby(['week']).agg({'total_covid_patients':'sum', 'all_patients':'sum'}).reset_index()

In [70]:
state['covid_patients_share'] = round((
    state["total_covid_patients"]
    / state["all_patients"]
),2)

In [71]:
state.head()

Unnamed: 0,week,total_covid_patients,all_patients,covid_patients_share
0,2020-12-25,20642.0,49688.0,0.42


### Covid patients by county

In [72]:
counties = trimmed_timeseries.groupby(['county', 'week']).agg({'total_covid_patients':'sum', 'all_patients':'sum'}).reset_index()

In [73]:
counties['covid_patients_share'] = round((
    counties["total_covid_patients"]
    / counties["all_patients"]
),2)

#### Which counties have the most Covid patients (in most recent week)?

In [74]:
counties[counties['week'] == counties['week'].max()].sort_values('total_covid_patients', ascending=False).head(5)

Unnamed: 0,county,week,total_covid_patients,all_patients,covid_patients_share
391,Los Angeles,2020-12-25,7584.0,15201.0,0.5
633,Orange,2020-12-25,2048.0,4387.0,0.47
764,San Bernardino,2020-12-25,1725.0,3160.0,0.55
699,Riverside,2020-12-25,1525.0,2760.0,0.55
786,San Diego,2020-12-25,1516.0,4192.0,0.36


#### Which counties have the highest percentage of Covid patients (in most recent week)?

In [75]:
counties[counties['week'] == counties['week'].max()].sort_values('total_covid_patients', ascending=False).head(5)

Unnamed: 0,county,week,total_covid_patients,all_patients,covid_patients_share
391,Los Angeles,2020-12-25,7584.0,15201.0,0.5
633,Orange,2020-12-25,2048.0,4387.0,0.47
764,San Bernardino,2020-12-25,1725.0,3160.0,0.55
699,Riverside,2020-12-25,1525.0,2760.0,0.55
786,San Diego,2020-12-25,1516.0,4192.0,0.36


### How has this changed over time? 

In [76]:
counties_grouped = counties.groupby(['county', 'week']).agg({'total_covid_patients':'sum', 'covid_patients_share':'sum'}).reset_index()

In [77]:
la_grouped = pd.DataFrame(counties_grouped[counties_grouped['county'] == 'Los Angeles'])

In [78]:
la_grouped.tail()

Unnamed: 0,county,week,total_covid_patients,covid_patients_share
387,Los Angeles,2020-11-27,2743.0,0.21
388,Los Angeles,2020-12-04,3666.0,0.26
389,Los Angeles,2020-12-11,5038.0,0.34
390,Los Angeles,2020-12-18,6497.0,0.43
391,Los Angeles,2020-12-25,7584.0,0.5


In [79]:
la_grouped.to_csv('output/la_grouped.csv', index=False)

In [80]:
alt.Chart(la_grouped).mark_area().encode(
    x=alt.X('week:T', axis=alt.Axis(format='', title=' ', tickCount=3)),
    y = alt.Y('covid_patients_share', title='Covid patients in LA County hospitals', axis=alt.Axis(format='%', tickCount=6)),
).properties(width=500, height=500)

### Just southern California counties? 

In [81]:
socal_grouped = counties_grouped[counties_grouped['county'].isin(socal)].groupby(['county','week']).agg({'total_covid_patients':'sum', 'covid_patients_share':'mean'}).reset_index()

In [83]:
alt.Chart(socal_grouped).mark_area().encode(
    x=alt.X('week:T', axis=alt.Axis(format='', title=' ', tickCount=3)),
    y = alt.Y('covid_patients_share', title=' ', axis=alt.Axis(format='%', title='', tickCount=6)),
    facet=alt.Facet('county')
).properties(width=200, height=200, title='Share of all patients with Covid in SoCal Counties')

### Bay Area? 

In [84]:
bayarea_grouped = counties_grouped[counties_grouped['county'].isin(bayarea)].groupby(['week']).agg({'total_covid_patients':'sum', 'covid_patients_share':'mean'}).reset_index()

In [85]:
bayarea_grouped.tail()

Unnamed: 0,week,total_covid_patients,covid_patients_share
17,2020-11-27,939.0,0.121111
18,2020-12-04,1183.0,0.146667
19,2020-12-11,1515.0,0.182222
20,2020-12-18,1806.0,0.21
21,2020-12-25,2002.0,0.235556


In [86]:
alt.Chart(bayarea_grouped).mark_area().encode(
    x=alt.X('week:T', axis=alt.Axis(format='', title=' ', tickCount=3)),
    y = alt.Y('covid_patients_share', title=' ', axis=alt.Axis(format='%', title='Share of all patients with Covid in Bay Area', tickCount=6)),
).properties(width=500, height=500)

---

### What's going on in the most-populous counties 

In [87]:
counties_pop = pd.read_csv('../census/processed/acs5_2018_population_counties.csv', dtype={'geoid':str, 'population':int, 'state':str, 'county':str})

In [88]:
counties_pop.rename(columns={'universe':'population'}, inplace=True)

In [89]:
counties_pop.drop(['universe_annotation', 'universe_moe', 'universe_moe_annotation'], axis=1, inplace=True)

In [90]:
ca_counties_pop = counties_pop[counties_pop['state'] == '06']

In [91]:
big_ca_counties_pop = ca_counties_pop.sort_values('population', ascending=False).head(20)

In [92]:
big_ca_counties_pop['name'] = big_ca_counties_pop['name'].str.replace(' County, California', '')

In [93]:
big_counties = list(big_ca_counties_pop.name)

In [94]:
big_counties

['Los Angeles',
 'San Diego',
 'Orange',
 'Riverside',
 'San Bernardino',
 'Santa Clara',
 'Alameda',
 'Sacramento',
 'Contra Costa',
 'Fresno',
 'Kern',
 'San Francisco',
 'Ventura',
 'San Mateo',
 'San Joaquin',
 'Stanislaus',
 'Sonoma',
 'Tulare',
 'Santa Barbara',
 'Solano']

In [95]:
big_counties_grouped = counties_grouped[counties_grouped['county'].isin(big_counties)].groupby(['county', 'week']).agg({'total_covid_patients':'sum', 'covid_patients_share':'mean'}).reset_index()

In [96]:
alt.Chart(big_counties_grouped).mark_area().encode(
    x=alt.X('week:T', axis=alt.Axis(grid=False, format='', title=' ', tickCount=3)),
    y = alt.Y('covid_patients_share', title=' ', axis=alt.Axis(format='%', title='', tickCount=3)),
    facet=alt.Facet('county', columns=5)
).properties(width=150, height=150, title='Share of all patients with Covid in large Counties')

---

### Hospital overcapacity

In [97]:
before = pd.read_csv('input/hospital_utilization.csv', dtype={'fac_no':str})

In [98]:
before[before['fac_name'].str.contains('KING')]

Unnamed: 0,fac_no,fac_name,license_no,icu_beds,tot_lic_beds
185,106191230,"MARTIN LUTHER KING, JR. COMMUNITY HOSPITAL",60000132,20,131


In [99]:
current_ca[current_ca['ccn'] == '050779']

Unnamed: 0,hospital,hospital_subtype,ccn,week,county,fips,total_covid_icu_patients,total_available_adult_icu_beds,pct_occupied_adult_icu_beds,total_covid_patients,all_patients,covid_patients_share
94,"Martin Luther King, Jr. Community Hospital",Short Term,50779,2020-12-25,Los Angeles,37,14.0,1.0,0.97,164.0,215.0,0.76


In [100]:
before[['fac_name']].sort_values('fac_name', ascending=False).to_csv('output/before_names.csv', index=False)

In [101]:
current_ca[['hospital']].sort_values('hospital', ascending=False).to_csv('output/current_names.csv', index=False)