# Analyzing HHS hospital occupancy data

#### An analysis of HHS hospital data, by @datagraphics and @stiles.

#### **Questions?** [matt.stiles@latimes.com](matt.stiles@latimes.com) \\ 310.529.8749

---

### Import Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import jenkspy
import matplotlib.pyplot as plt

%matplotlib inline
import json
import numpy as np
import altair as alt
import altair_latimes as lat

alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

---

### Define cities and regions metadata

In [3]:
metadata_df = pd.read_csv("input/city-metadata.csv", dtype={"fips": str})

In [4]:
metadata_df = metadata_df[["county", "fips", "population", "region"]]

In [5]:
socal = ["Los Angeles", "Orange", "Ventura", "San Bernardino", "Riverside"]
bayarea = [
    "Alameda",
    "Contra Costa",
    "Marin",
    "Napa",
    "San Francisco",
    "San Mateo",
    "Santa Clara",
    "Solano",
    "Sonoma",
]

### Get the latest url from the HHS API and read in the latest dataframe

In [6]:
# Data dictionary: https://healthdata.gov/covid-19-reported-patient-impact-and-hospital-capacity-facility-data-dictionary
# Data source: https://healthdata.gov/dataset/covid-19-reported-patient-impact-and-hospital-capacity-facility

In [7]:
metadata = pd.read_json(
    "https://healthdata.gov/api/3/action/package_show?id=d475cc4e-83cd-4c16-be57-9105f300e0bc&page=0"
).result[0]["resources"]

In [8]:
src = pd.read_csv(
    metadata[0]["url"],
    dtype={"fips_code": str, "zip": str},
    infer_datetime_format=True,
    parse_dates=True,
)

In [9]:
# src = pd.read_csv('https://healthdata.gov/sites/default/files/reported_hospital_capacity_admissions_facility_level_weekly_average_timeseries_20201228.csv',\
# dtype={'fips_code':str, 'zip':str}, infer_datetime_format=True, parse_dates=True)

In [10]:
df = src.copy()

In [11]:
ca_data = pd.DataFrame(df.loc[df.state == "CA"])

In [12]:
ca_data["fips"] = ca_data["fips_code"].str[2:5]

In [13]:
ca_timeseries = pd.merge(
    ca_data, metadata_df, how="left", left_on="fips", right_on="fips"
)

### What's the most recent collection week?

In [14]:
ca_timeseries["collection_week"] = pd.to_datetime(ca_timeseries["collection_week"])

In [15]:
ca_timeseries["collection_week"].max()

Timestamp('2021-01-29 00:00:00')

### Get rid of the -999999 suppressed values

In [16]:
ca_timeseries = ca_timeseries.replace([-999999.0], [0])

In [17]:
ca_timeseries["hospital_name"] = (ca_timeseries["hospital_name"]).str.title()
ca_timeseries["hospital_name"] = (
    (ca_timeseries["hospital_name"])
    .str.replace(" Of ", " of ")
    .str.replace("Hlth", "Health")
    .str.replace(" La", " LA")
)

---

## Calculate some hospitalization/covid rates

In [18]:
ca_timeseries.head()

Unnamed: 0,hospital_pk,collection_week,state,ccn,hospital_name,address,city,zip,hospital_subtype,fips_code,is_metro_micro,total_beds_7_day_avg,all_adult_hospital_beds_7_day_avg,all_adult_hospital_inpatient_beds_7_day_avg,inpatient_beds_used_7_day_avg,all_adult_hospital_inpatient_bed_occupied_7_day_avg,total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg,total_adult_patients_hospitalized_confirmed_covid_7_day_avg,total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg,total_pediatric_patients_hospitalized_confirmed_covid_7_day_avg,inpatient_beds_7_day_avg,total_icu_beds_7_day_avg,total_staffed_adult_icu_beds_7_day_avg,icu_beds_used_7_day_avg,staffed_adult_icu_bed_occupancy_7_day_avg,...,previous_day_admission_adult_covid_confirmed_40-49_7_day_sum,previous_day_admission_adult_covid_confirmed_50-59_7_day_sum,previous_day_admission_adult_covid_confirmed_60-69_7_day_sum,previous_day_admission_adult_covid_confirmed_70-79_7_day_sum,previous_day_admission_adult_covid_confirmed_80+_7_day_sum,previous_day_admission_adult_covid_confirmed_unknown_7_day_sum,previous_day_admission_pediatric_covid_confirmed_7_day_sum,previous_day_covid_ED_visits_7_day_sum,previous_day_admission_adult_covid_suspected_7_day_sum,previous_day_admission_adult_covid_suspected_18-19_7_day_sum,previous_day_admission_adult_covid_suspected_20-29_7_day_sum,previous_day_admission_adult_covid_suspected_30-39_7_day_sum,previous_day_admission_adult_covid_suspected_40-49_7_day_sum,previous_day_admission_adult_covid_suspected_50-59_7_day_sum,previous_day_admission_adult_covid_suspected_60-69_7_day_sum,previous_day_admission_adult_covid_suspected_70-79_7_day_sum,previous_day_admission_adult_covid_suspected_80+_7_day_sum,previous_day_admission_adult_covid_suspected_unknown_7_day_sum,previous_day_admission_pediatric_covid_suspected_7_day_sum,previous_day_total_ED_visits_7_day_sum,previous_day_admission_influenza_confirmed_7_day_sum,fips,county,population,region
0,50512,2021-01-29,CA,50512,Kaiser Foundation Hospital - Fremont,39400 PASEO PADRE PKWY,FREMONT,94538,Short Term,6001,True,93.6,93.6,93.6,85.0,85.0,34.1,28.3,0.0,0.0,93.6,11.6,11.6,11.4,11.4,...,0.0,4.0,4.0,0.0,4.0,0.0,0.0,50.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,549.0,0.0,1,Alameda,1643700,Bay Area
1,50075,2021-01-29,CA,50075,Kaiser Foundation Hospital - Oakland/Richmond,275 WEST MACARTHUR BOULEVARD,OAKLAND,94611,Short Term,6001,True,269.9,237.4,237.4,248.3,218.9,63.7,51.9,0.0,0.0,269.9,47.4,30.9,44.9,29.7,...,0.0,11.0,10.0,4.0,4.0,0.0,0.0,110.0,22.0,0.0,0.0,0.0,0.0,4.0,4.0,0.0,11.0,0.0,0.0,945.0,0.0,1,Alameda,1643700,Bay Area
2,50195,2021-01-29,CA,50195,Washington Hospital,2000 MOWRY AVE,FREMONT,94538,Short Term,6001,True,229.7,216.7,194.7,195.9,194.7,59.0,50.3,0.0,0.0,207.7,38.4,29.4,30.6,29.4,...,0.0,0.0,5.0,0.0,14.0,0.0,0.0,146.0,18.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,10.0,0.0,0.0,780.0,0.0,1,Alameda,1643700,Bay Area
3,50777,2021-01-29,CA,50777,Kaiser Foundation Hospital - San Leandro,2500 MERCED STREET,SAN LEANDRO,94577,Short Term,6001,True,182.0,171.7,171.7,165.3,156.0,41.9,36.6,0.0,0.0,182.0,29.4,19.4,27.1,18.1,...,6.0,10.0,7.0,8.0,5.0,0.0,0.0,111.0,16.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,1018.0,0.0,1,Alameda,1643700,Bay Area
4,50002,2021-01-29,CA,50002,St Rose Hospital,27200 CALAROGA AVE,HAYWARD,94545,Short Term,6001,True,95.0,95.0,95.0,58.3,58.3,24.3,24.3,0.0,0.0,95.0,14.0,14.0,11.6,11.6,...,0.0,0.0,4.0,4.0,0.0,0.0,0.0,62.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,382.0,0.0,1,Alameda,1643700,Bay Area


#### How many beds

In [19]:
ca_timeseries["total_beds"] = round(
    (
        ca_timeseries["total_beds_7_day_sum"]
        / ca_timeseries["total_beds_7_day_coverage"]
    ),
    0,
)

#### Calculate daily average of total staffed ICU beds

In [20]:
ca_timeseries["total_staffed_adult_icu_beds"] = round(
    (
        ca_timeseries["total_staffed_adult_icu_beds_7_day_sum"]
        / ca_timeseries["total_staffed_adult_icu_beds_7_day_coverage"]
    ),
    0,
)

#### Calculate daily average of occupied ICU beds

In [21]:
ca_timeseries["total_occupied_adult_icu_beds"] = round(
    (
        ca_timeseries["staffed_adult_icu_bed_occupancy_7_day_sum"]
        / ca_timeseries["staffed_adult_icu_bed_occupancy_7_day_coverage"]
    ),
    0,
)

#### Calculate daily average of COVID patients in the ICU

In [22]:
ca_timeseries["total_covid_icu_patients"] = round(
    (
        ca_timeseries[
            "staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum"
        ]
        / ca_timeseries[
            "staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_coverage"
        ]
    ),
    0,
)

#### Number of ICU beds available

In [23]:
ca_timeseries["total_available_adult_icu_beds"] = round(
    (
        ca_timeseries["total_staffed_adult_icu_beds"]
        - ca_timeseries["total_occupied_adult_icu_beds"]
    ),
    0,
)

#### Calculate daily ICU occupancy as percentage

In [24]:
ca_timeseries["pct_occupied_adult_icu_beds"] = round(
    (
        ca_timeseries["total_occupied_adult_icu_beds"]
        / ca_timeseries["total_staffed_adult_icu_beds"]
    ),
    2,
)

#### If a hospital reports 0 staffed adult ICU beds, drop them from the dataframe

In [25]:
filtered_timeseries = ca_timeseries[
    (ca_timeseries.total_staffed_adult_icu_beds_7_day_sum.notnull())
    & (ca_timeseries.total_staffed_adult_icu_beds_7_day_sum != 0)
].sort_values("total_staffed_adult_icu_beds_7_day_sum")

#### How many patients?

In [26]:
ca_timeseries["all_patients"] = round(
    (
        ca_timeseries["inpatient_beds_used_7_day_sum"]
        / ca_timeseries["inpatient_beds_used_7_day_coverage"]
    ),
    0,
)

#### How many Covid patients

In [27]:
ca_timeseries["total_adult_covid_patients"] = (
    ca_timeseries[
        "total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum"
    ]
    / ca_timeseries[
        "total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage"
    ]
)

In [28]:
ca_timeseries["total_pediatric_covid_patients"] = (
    ca_timeseries[
        "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum"
    ]
    / ca_timeseries[
        "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage"
    ]
)

In [29]:
ca_timeseries["total_covid_patients"] = round(
    ca_timeseries["total_adult_covid_patients"]
    + ca_timeseries["total_pediatric_covid_patients"],
    0,
).fillna(0)

#### What's the rate of Covid patients?

In [30]:
ca_timeseries["covid_patients_share"] = round(
    (ca_timeseries["total_covid_patients"] / ca_timeseries["all_patients"]), 2
)

In [31]:
ca_timeseries = ca_timeseries[ca_timeseries["all_patients"] > 0]

---

### Trim to the columns we want

In [32]:
trimmed_timeseries = ca_timeseries[
    [
        "hospital_name",
        "hospital_subtype",
        "ccn",
        "collection_week",
        "county",
        "fips",
        "total_beds",
        "total_staffed_adult_icu_beds",
        "total_covid_icu_patients",
        "total_available_adult_icu_beds",
        "pct_occupied_adult_icu_beds",
        "total_covid_patients",
        "all_patients",
        "covid_patients_share",
        "region",
    ]
].rename(
    columns={
        "hospital_name": "hospital",
        "collection_week": "week",
    }
)

In [33]:
trimmed_timeseries.covid_patients_share.mean()

0.18411217948717948

In [34]:
trimmed_timeseries.to_csv("output/trimmed_timeseries.csv", index=False)

---

### Filter the dataframe for Los Angeles County facilities

In [35]:
la = trimmed_timeseries[trimmed_timeseries["fips"] == "037"]
oc = trimmed_timeseries[trimmed_timeseries["fips"] == "059"]

### Filter the California dataframe to the most recent collection week

In [36]:
current_ca = trimmed_timeseries[
    trimmed_timeseries["week"] == trimmed_timeseries["week"].max()
].sort_values("covid_patients_share", ascending=False)

In [37]:
current_la = la[la["week"] == la["week"].max()].sort_values(
    "covid_patients_share", ascending=False
)

In [38]:
current_oc = oc[oc["week"] == oc["week"].max()].sort_values(
    "covid_patients_share", ascending=False
)

In [39]:
current_ca.to_csv("output/current_ca.csv", index=False)

### Specific place?

In [40]:
current_ca[current_ca["hospital"].str.contains("Methodist Hospital")].iloc[0]

hospital                          Methodist Hospital of Southern Ca
hospital_subtype                                         Short Term
ccn                                                          050238
week                                            2021-01-29 00:00:00
county                                                  Los Angeles
fips                                                            037
total_beds                                                    271.0
total_staffed_adult_icu_beds                                   28.0
total_covid_icu_patients                                       13.0
total_available_adult_icu_beds                                  1.0
pct_occupied_adult_icu_beds                                    0.96
total_covid_patients                                           74.0
all_patients                                                  211.0
covid_patients_share                                           0.35
region                                          

### Which CA hospitals have the greatest share of covid patients? 

In [41]:
current_ca[
    (current_ca["hospital_subtype"] != "Long Term")
    & (current_ca["total_covid_icu_patients"] > 0)
][
    [
        "hospital",
        "total_beds",
        "total_covid_patients",
        "all_patients",
        "covid_patients_share",
        "region",
        "county",
    ]
].sort_values(
    "covid_patients_share", ascending=False
).head(
    10
)

Unnamed: 0,hospital,total_beds,total_covid_patients,all_patients,covid_patients_share,region,county
134,Coast Plaza Hospital,72.0,33.0,46.0,0.72,Southern California,Los Angeles
87,Pih Hospital - Downey,162.0,83.0,119.0,0.7,Southern California,Los Angeles
327,Oak Valley Hospital District,24.0,8.0,12.0,0.67,San Joaquin Valley,Stanislaus
228,Chino Valley Medical Center,97.0,56.0,85.0,0.66,Southern California,San Bernardino
216,Kaiser Foundation Hospital-Moreno Valley,120.0,59.0,91.0,0.65,Southern California,Riverside
184,Garden Grove Hospital & Medical Center,85.0,42.0,65.0,0.65,Southern California,Orange
120,East Los Angeles Doctors Hospital,102.0,23.0,36.0,0.64,Southern California,Los Angeles
110,Kaiser Foundation Hospital - Baldwin Park,248.0,113.0,187.0,0.6,Southern California,Los Angeles
119,Kaiser Foundation Hospital - Downey,385.0,142.0,240.0,0.59,Southern California,Los Angeles
258,Palomar Health Downtown Campus,294.0,127.0,219.0,0.58,Southern California,San Diego


### Which facilities in LA have the greatest share of covid patients? 

In [42]:
current_la[
    (current_la["hospital_subtype"] != "Long Term")
    & (current_la["total_covid_icu_patients"] > 0)
][
    [
        "hospital",
        "total_beds",
        "total_covid_patients",
        "all_patients",
        "covid_patients_share",
    ]
].sort_values(
    "covid_patients_share", ascending=False
).head(
    10
)

Unnamed: 0,hospital,total_beds,total_covid_patients,all_patients,covid_patients_share
134,Coast Plaza Hospital,72.0,33.0,46.0,0.72
87,Pih Hospital - Downey,162.0,83.0,119.0,0.7
120,East Los Angeles Doctors Hospital,102.0,23.0,36.0,0.64
110,Kaiser Foundation Hospital - Baldwin Park,248.0,113.0,187.0,0.6
119,Kaiser Foundation Hospital - Downey,385.0,142.0,240.0,0.59
71,Pacifica Hospital of The Valley,142.0,56.0,98.0,0.57
90,San Dimas Community Hospital,162.0,31.0,57.0,0.54
75,Greater El Monte Community Hospital,86.0,26.0,48.0,0.54
86,Palmdale Regional Medical Center,184.0,70.0,133.0,0.53
77,"Martin Luther King, Jr. Community Hospital",260.0,112.0,212.0,0.53


### Which places have the lowest share? 

In [43]:
current_la[
    (current_la["hospital_subtype"] != "Long Term")
    & (current_la["total_covid_icu_patients"] > 0)
][
    [
        "hospital",
        "total_beds",
        "total_covid_patients",
        "all_patients",
        "covid_patients_share",
    ]
].sort_values(
    "covid_patients_share", ascending=False
).tail(
    10
)

Unnamed: 0,hospital,total_beds,total_covid_patients,all_patients,covid_patients_share
88,Lac/Olive View-Ucla Medical Center,237.0,52.0,204.0,0.25
130,Memorial Hospital of Gardena,204.0,45.0,191.0,0.24
76,Sherman Oaks Hospital,114.0,21.0,92.0,0.23
62,Santa Monica - Ucla Med Ctr & Orthopaedic Hosp...,296.0,50.0,214.0,0.23
108,Cedars-Sinai Medical Center,883.0,166.0,778.0,0.21
66,Ronald Reagan U C L A Medical Center,460.0,80.0,406.0,0.2
121,Encino Hospital Medical Center,68.0,12.0,67.0,0.18
135,College Medical Center,234.0,14.0,141.0,0.1
84,City of Hope Helford Clinical Research Hospital,231.0,20.0,217.0,0.09
91,Usc Kenneth Norris Jr Cancer Hospital,60.0,2.0,30.0,0.07


### Which facilities in OC have the greatest share of covid patients? 

In [44]:
current_oc[
    (current_oc["hospital_subtype"] != "Long Term")
    & (current_oc["total_covid_icu_patients"] > 0)
][
    ["hospital", "total_covid_patients", "all_patients", "covid_patients_share"]
].sort_values(
    "covid_patients_share", ascending=False
).head(
    5
)

Unnamed: 0,hospital,total_covid_patients,all_patients,covid_patients_share
184,Garden Grove Hospital & Medical Center,42.0,65.0,0.65
176,Ahmc Anaheim Regional Medical Center,60.0,115.0,0.52
166,Placentia Linda Hospital,19.0,37.0,0.51
192,Fountain Valley Regional Hospital & Medical Ce...,107.0,222.0,0.48
187,Providence St. Joseph Hospital,150.0,312.0,0.48


In [45]:
current_oc[
    (current_oc["hospital_subtype"] != "Long Term")
    & (current_oc["total_covid_icu_patients"] > 0)
][
    [
        "hospital",
        "total_beds",
        "total_covid_patients",
        "all_patients",
        "covid_patients_share",
    ]
].sort_values(
    "covid_patients_share", ascending=False
).tail(
    10
)

Unnamed: 0,hospital,total_beds,total_covid_patients,all_patients,covid_patients_share
191,La Palma Intercommunity Hospital,51.0,14.0,40.0,0.35
175,Huntington Beach Hospital,86.0,20.0,59.0,0.34
177,Providence Mission Hospital,310.0,70.0,250.0,0.28
174,Hoag Memorial Hospital Presbyterian,445.0,112.0,430.0,0.26
180,Memorialcare Saddleback Medical Center,239.0,36.0,143.0,0.25
179,South Coast Global Medical Center,178.0,20.0,80.0,0.25
188,Chapman Global Medical Center,114.0,15.0,62.0,0.24
181,University of California Irvine Medical Center,423.0,81.0,356.0,0.23
171,Orange County Global Medical Center,337.0,48.0,227.0,0.21
169,Anaheim Global Medical Center,189.0,17.0,109.0,0.16


In [46]:
current_la["covid_patients_share"] = round(
    (current_la["covid_patients_share"] * 100), 2
)

### Output top LA hospitals for CMS table

In [47]:
current_la[
    (current_la["hospital_subtype"] != "Long Term")
    & (current_la["total_covid_icu_patients"] > 0)
][["hospital", "total_covid_patients", "covid_patients_share"]].sort_values(
    "covid_patients_share", ascending=False
).head(
    10
).to_csv(
    "output/current_la_hospitals.csv", index=False
)

### Which facilities in LA have the highest ICU capacity rates? 

In [48]:
current_la[(current_la["total_covid_icu_patients"] > 0)].sort_values(
    "pct_occupied_adult_icu_beds", ascending=False
).head(10)

Unnamed: 0,hospital,hospital_subtype,ccn,week,county,fips,total_beds,total_staffed_adult_icu_beds,total_covid_icu_patients,total_available_adult_icu_beds,pct_occupied_adult_icu_beds,total_covid_patients,all_patients,covid_patients_share,region
134,Coast Plaza Hospital,Short Term,50771,2021-01-29,Los Angeles,37,72.0,8.0,7.0,0.0,1.0,33.0,46.0,72.0,Southern California
95,"L A Downtown Medical Center, Llc",Short Term,50763,2021-01-29,Los Angeles,37,113.0,12.0,3.0,0.0,1.0,34.0,107.0,32.0,Southern California
92,Kaiser Foundation Hospital - Woodland Hills,Short Term,50677,2021-01-29,Los Angeles,37,226.0,20.0,13.0,0.0,1.0,64.0,144.0,44.0,Southern California
79,Kaiser Foundation Hospital - South Bay,Short Term,50411,2021-01-29,Los Angeles,37,237.0,22.0,13.0,0.0,1.0,72.0,174.0,41.0,Southern California
131,Kaiser Foundation Hospital - West LA,Short Term,50561,2021-01-29,Los Angeles,37,230.0,33.0,24.0,0.0,1.0,65.0,142.0,46.0,Southern California
80,Kindred Hospital South Bay,Long Term,52050,2021-01-29,Los Angeles,37,59.0,9.0,7.0,0.0,1.0,22.0,57.0,39.0,Southern California
116,San Gabriel Valley Medical Center,Short Term,50132,2021-01-29,Los Angeles,37,112.0,18.0,12.0,0.0,1.0,45.0,95.0,47.0,Southern California
114,Centinela Hospital Medical Center,Short Term,50739,2021-01-29,Los Angeles,37,316.0,67.0,35.0,0.0,1.0,93.0,256.0,36.0,Southern California
125,Glendale Adventist Medical Center,Short Term,50239,2021-01-29,Los Angeles,37,302.0,55.0,44.0,0.0,1.0,134.0,277.0,48.0,Southern California
67,Providence Holy Cross Medical Center,Short Term,50278,2021-01-29,Los Angeles,37,335.0,42.0,31.0,0.0,1.0,125.0,294.0,43.0,Southern California


### How many LA County hospitals (with Covid patients) are in our dataframe? 

In [49]:
len(current_la[(current_la["total_covid_icu_patients"] > 0)])

75

### How many have 'high' ICU occupancy rates? 

In [50]:
len(current_la[current_la["pct_occupied_adult_icu_beds"] > 0.90])

45

### Top 10 Bay Area hospotals by Covid share? 

In [51]:
current_ca[current_ca["county"].isin(bayarea)].sort_values(
    "covid_patients_share", ascending=False
).head(10)

Unnamed: 0,hospital,hospital_subtype,ccn,week,county,fips,total_beds,total_staffed_adult_icu_beds,total_covid_icu_patients,total_available_adult_icu_beds,pct_occupied_adult_icu_beds,total_covid_patients,all_patients,covid_patients_share,region
289,Kaiser Foundation Hospital - South San Francisco,Short Term,50070,2021-01-29,San Mateo,81,95.0,12.0,7.0,1.0,0.92,42.0,86.0,0.49,Bay Area
304,Regional Medical Center of San Jose,Short Term,50125,2021-01-29,Santa Clara,85,282.0,52.0,28.0,0.0,1.0,84.0,196.0,0.43,Bay Area
4,St Rose Hospital,Short Term,50002,2021-01-29,Alameda,1,95.0,14.0,6.0,2.0,0.86,24.0,58.0,0.41,Bay Area
0,Kaiser Foundation Hospital - Fremont,Short Term,50512,2021-01-29,Alameda,1,94.0,12.0,8.0,1.0,0.92,34.0,85.0,0.4,Bay Area
25,San Ramon Regional Medical Center,Short Term,50689,2021-01-29,Contra Costa,13,65.0,12.0,4.0,4.0,0.67,24.0,61.0,0.39,Bay Area
291,Seton Medical Center,Short Term,50289,2021-01-29,San Mateo,81,74.0,9.0,4.0,2.0,0.78,24.0,65.0,0.37,Bay Area
299,Kaiser Foundation Hospital-San Jose,Short Term,50604,2021-01-29,Santa Clara,85,149.0,19.0,11.0,2.0,0.89,49.0,135.0,0.36,Bay Area
316,Kaiser Foundation Hospital - Vacaville,Short Term,50767,2021-01-29,Solano,95,115.0,15.0,11.0,0.0,1.0,36.0,105.0,0.34,Bay Area
2,Washington Hospital,Short Term,50195,2021-01-29,Alameda,1,230.0,29.0,15.0,0.0,1.0,59.0,196.0,0.3,Bay Area
317,Kaiser Foundation Hospital And Rehab Center,Short Term,50073,2021-01-29,Solano,95,177.0,16.0,10.0,1.0,0.94,49.0,164.0,0.3,Bay Area


---

## Get medical facilities' geographic data

In [52]:
# From HHS: https://maps3.arcgisonline.com/arcgis/rest/services/A-16/HHS_IOM_Health_Resources/MapServer

In [53]:
hospitals = gpd.read_file(
    "/Users/mhustiles/data/github/AGStoShapefile/backupdir/HHS_Hospital/HHS_Hospital_1608139617293.geojson"
)

In [54]:
medical_centers = gpd.read_file(
    "/Users/mhustiles/data/github/AGStoShapefile/backupdir/HHS_Medical_Center/HHS_Medical_Center_1608139616289.geojson"
)

### Clean up the column names so we can merge the facilities dataframes

In [55]:
facilities = [hospitals, medical_centers]

In [56]:
hospitals.rename(
    columns={
        "Name_new": "name",
        "Address_1": "address",
        "City": "city",
        "State_1": "state",
        "ZipCode": "zipcode",
        "PhoneNum": "phone",
        "County_Nam": "county",
        "Provider_N": "provider_id",
        "Hospital_T": "type",
        "Hospital_O": "operation",
        "Emergency_": "emergency",
    },
    inplace=True,
)

In [57]:
medical_centers.rename(
    columns={
        "Hospital_N": "name",
        "Address1": "address",
        "City_1": "city",
        "State_1": "state",
        "ZipCode": "zipcode",
        "PhoneNum": "phone",
        "County_Nam": "county",
        "Provider_N": "provider_id",
        "Hospital_T": "type",
        "Hospital_O": "operation",
        "Emergency_": "emergency",
    },
    inplace=True,
)

### Concatenate the different facility types into one dataframe, and also filter that just to CA

In [58]:
all_medical_geo = pd.concat(facilities)

In [59]:
all_medical_geo_ca = all_medical_geo[all_medical_geo["state"] == "CA"]

### We might be better off using HHS' own locations data with CCNs

In [60]:
locations = pd.read_csv(
    "input/hospital_locations.csv",
    dtype={"latitude": float, "longitude": float, "CCN": str},
)

In [61]:
locations.columns = (
    locations.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=False)
    .str.replace("(", "", regex=False)
    .str.replace(")", "", regex=False)
    .str.replace("-", "_", regex=False)
)

### Even though it has the wrong location for White Memorial. Let's fix.

In [62]:
locations.loc[
    (locations.facility_name == "WHITE MEMORIAL MEDICAL CENTER"), "longitude"
] = -118.2176219

In [63]:
locations.loc[
    (locations.facility_name == "WHITE MEMORIAL MEDICAL CENTER"), "latitude"
] = 34.0493044

### Convert the lon/lat fields into a geodataframe

In [64]:
locations = gpd.GeoDataFrame(
    locations, geometry=gpd.points_from_xy(locations.longitude, locations.latitude)
)

### And then confine it to California

In [65]:
locationsca = gpd.GeoDataFrame(locations[locations["state"] == "CA"])

### Make the CCN string match the hospital capacity data

In [66]:
locationsca["ccn"] = locationsca["ccn"].str.zfill(6)

---

### Merge with our CA medical facilities

In [67]:
current_ca_geo = locationsca.merge(current_ca, left_on="ccn", right_on="ccn")

In [68]:
len(current_ca_geo)

346

In [69]:
current_ca_geo.rename(
    columns={
        "county_x": "county",
        "city_x": "city",
        "address_x": "address",
        "state_x": "state",
    },
    inplace=True,
)

In [70]:
# current_ca_geo.drop(['county_y', 'hospital_subtype', 'provider_id', 'ccn', 'emergency', 'address', 'OBJECTID', 'state', 'name', 'phone', 'zipcode'], axis=1, inplace=True)

---

### Filter to just LA County hospitals

In [71]:
current_la_geo = current_ca_geo[current_ca_geo["fips"] == "037"]

### How many facilities in California?

In [72]:
len(current_ca_geo)

346

### How many in L.A. County?

In [73]:
len(current_la_geo)

84

---

### Export hospital specific data for maps and tables

In [74]:
current_ca_geo.to_file("output/current_ca_geo.geojson", driver="GeoJSON")

In [75]:
current_ca_geo.to_csv("output/current_ca_geo.csv", index=False)

In [76]:
current_la_geo.to_file("output/current_la_geo.geojson", driver="GeoJSON")

In [77]:
current_la_geo.to_csv("output/current_la_geo.csv", index=False)

---

## Aggregate

### What's the average share of Covid patients in hospitals statewide?

In [78]:
state = (
    current_ca.groupby(["week"])
    .agg({"total_covid_patients": "sum", "all_patients": "sum"})
    .reset_index()
)

In [79]:
state["covid_patients_share"] = round(
    (state["total_covid_patients"] / state["all_patients"]), 2
)

In [80]:
state.head()

Unnamed: 0,week,total_covid_patients,all_patients,covid_patients_share
0,2021-01-29,14745.0,49535.0,0.3


### Covid patients by county

In [81]:
counties = (
    trimmed_timeseries.groupby(["county", "week", "region"])
    .agg({"total_covid_patients": "sum", "all_patients": "sum"})
    .reset_index()
)

In [82]:
counties["covid_patients_share"] = round(
    (counties["total_covid_patients"] / counties["all_patients"]), 2
)

#### Which counties have the most Covid patients (in most recent week)?

In [83]:
counties[counties["week"] == counties["week"].max()].sort_values(
    "total_covid_patients", ascending=False
).head(5)

Unnamed: 0,county,week,region,total_covid_patients,all_patients,covid_patients_share
481,Los Angeles,2021-01-29,Southern California,5409.0,15012.0,0.36
778,Orange,2021-01-29,Southern California,1324.0,4169.0,0.32
966,San Diego,2021-01-29,Southern California,1301.0,4317.0,0.3
859,Riverside,2021-01-29,Southern California,1023.0,2591.0,0.39
939,San Bernardino,2021-01-29,Southern California,1016.0,3013.0,0.34


#### Which counties have the highest percentage of Covid patients (in most recent week)?

In [84]:
counties[counties["week"] == counties["week"].max()].sort_values(
    "total_covid_patients", ascending=False
).head(5)

Unnamed: 0,county,week,region,total_covid_patients,all_patients,covid_patients_share
481,Los Angeles,2021-01-29,Southern California,5409.0,15012.0,0.36
778,Orange,2021-01-29,Southern California,1324.0,4169.0,0.32
966,San Diego,2021-01-29,Southern California,1301.0,4317.0,0.3
859,Riverside,2021-01-29,Southern California,1023.0,2591.0,0.39
939,San Bernardino,2021-01-29,Southern California,1016.0,3013.0,0.34


### How has this changed over time? 

In [85]:
counties_grouped = (
    counties.groupby(["county", "week", "region"])
    .agg({"total_covid_patients": "sum", "covid_patients_share": "mean"})
    .reset_index()
)

In [86]:
la_grouped = pd.DataFrame(counties_grouped[counties_grouped["county"] == "Los Angeles"])

In [87]:
la_grouped.tail()

Unnamed: 0,county,week,region,total_covid_patients,covid_patients_share
477,Los Angeles,2021-01-01,Southern California,8199.0,0.52
478,Los Angeles,2021-01-08,Southern California,8174.0,0.51
479,Los Angeles,2021-01-15,Southern California,7561.0,0.47
480,Los Angeles,2021-01-22,Southern California,6495.0,0.42
481,Los Angeles,2021-01-29,Southern California,5409.0,0.36


In [88]:
la_grouped.to_csv("output/la_grouped.csv", index=False)

In [89]:
alt.Chart(la_grouped).mark_area().encode(
    x=alt.X("week:T", axis=alt.Axis(format="", title=" ", tickCount=3)),
    y=alt.Y(
        "covid_patients_share",
        title="Covid patients in LA County hospitals",
        axis=alt.Axis(format="%", tickCount=6),
    ),
).properties(width=500, height=500)

### Just southern California counties? 

In [90]:
socal_grouped = (
    counties_grouped[counties_grouped["county"].isin(socal)]
    .groupby(["county", "week"])
    .agg({"total_covid_patients": "sum", "covid_patients_share": "mean"})
    .reset_index()
)

In [91]:
alt.Chart(socal_grouped).mark_area().encode(
    x=alt.X("week:T", axis=alt.Axis(format="", title=" ", tickCount=3)),
    y=alt.Y(
        "covid_patients_share",
        title=" ",
        axis=alt.Axis(format="%", title="", tickCount=6),
    ),
    facet=alt.Facet("county"),
).properties(
    width=200, height=200, title="Share of all patients with Covid in SoCal Counties"
)

### Bay Area? 

In [92]:
bayarea_grouped = (
    counties_grouped[counties_grouped["county"].isin(bayarea)]
    .groupby(["week"])
    .agg({"total_covid_patients": "sum", "covid_patients_share": "mean"})
    .reset_index()
)

In [93]:
bayarea_grouped.tail()

Unnamed: 0,week,total_covid_patients,covid_patients_share
22,2021-01-01,2163.0,0.254444
23,2021-01-08,2227.0,0.252222
24,2021-01-15,2147.0,0.247778
25,2021-01-22,1868.0,0.215556
26,2021-01-29,1581.0,0.186667


In [94]:
alt.Chart(bayarea_grouped).mark_area().encode(
    x=alt.X("week:T", axis=alt.Axis(format="", title=" ", tickCount=3)),
    y=alt.Y(
        "covid_patients_share",
        title=" ",
        axis=alt.Axis(
            format="%",
            title="Share of all patients with Covid in Bay Area",
            tickCount=6,
        ),
    ),
).properties(width=500, height=500)

---

### What's going on in the most-populous counties 

In [95]:
counties_pop = pd.read_csv(
    "../census/processed/acs5_2018_population_counties.csv",
    dtype={"geoid": str, "population": int, "state": str, "county": str},
)

In [96]:
counties_pop.rename(columns={"universe": "population"}, inplace=True)

In [97]:
counties_pop.drop(
    ["universe_annotation", "universe_moe", "universe_moe_annotation"],
    axis=1,
    inplace=True,
)

In [98]:
ca_counties_pop = counties_pop[counties_pop["state"] == "06"]

In [99]:
big_ca_counties_pop = ca_counties_pop.sort_values("population", ascending=False).head(
    10
)

In [100]:
big_ca_counties_pop["name"] = big_ca_counties_pop["name"].str.replace(
    " County, California", ""
)

In [101]:
big_counties = list(big_ca_counties_pop.name)

In [102]:
big_counties_grouped = (
    counties_grouped[counties_grouped["county"].isin(big_counties)]
    .groupby(["county", "week", "region"])
    .agg({"total_covid_patients": "sum", "covid_patients_share": "mean"})
    .reset_index()
)

In [103]:
chart = (
    alt.Chart(big_counties_grouped)
    .mark_area()
    .encode(
        x=alt.X(
            "week:T", axis=alt.Axis(grid=False, format="%b.", title=" ", tickCount=4)
        ),
        y=alt.Y(
            "covid_patients_share",
            title=" ",
            axis=alt.Axis(
                format="%",
                title="",
                tickCount=5,
                gridColor="#dddddd",
                offset=6,
                tickSize=0,
                domainOpacity=0,
            ),
        ),
        facet=alt.Facet(
            "county",
            columns=5,
            title=" ",
            sort=alt.EncodingSortField(
                "covid_patients_share", op="max", order="descending"
            ),
        ),
        color=alt.Color("region", legend=alt.Legend(orient="top", title=" ")),
    )
    .properties(
        width=200,
        height=200,
        title="Share of all hospital patients with COVID-19 in large California counties",
    )
)

chart.configure_view(strokeOpacity=0)

---

### Regions

In [104]:
regions_timeseries = (
    trimmed_timeseries.groupby(["week", "region"])
    .agg(
        {
            "total_covid_patients": "sum",
            "all_patients": "sum",
            "total_beds": "sum",
            "total_staffed_adult_icu_beds": sum,
        }
    )
    .reset_index()
)

In [105]:
regions_timeseries["covid_patients_share"] = round(
    (regions_timeseries["total_covid_patients"] / regions_timeseries["all_patients"]), 2
)

In [106]:
regions_timeseries.tail(5)

Unnamed: 0,week,region,total_covid_patients,all_patients,total_beds,total_staffed_adult_icu_beds,covid_patients_share
130,2021-01-29,Bay Area,1754.0,9252.0,13149.0,1429.0,0.19
131,2021-01-29,Greater Sacramento,600.0,3666.0,4700.0,540.0,0.16
132,2021-01-29,Northern California,73.0,750.0,1314.0,130.0,0.1
133,2021-01-29,San Joaquin Valley,1594.0,5075.0,6552.0,755.0,0.31
134,2021-01-29,Southern California,10724.0,30792.0,42684.0,5443.0,0.35


In [107]:
regions_timeseries[
    regions_timeseries["week"] == "2021-01-01"
].covid_patients_share.mean()

0.32

In [108]:
chart = (
    alt.Chart(regions_timeseries)
    .mark_area()
    .encode(
        x=alt.X(
            "week:T", axis=alt.Axis(grid=False, format="%b.", title=" ", tickCount=4)
        ),
        y=alt.Y(
            "covid_patients_share",
            title=" ",
            axis=alt.Axis(
                format="%",
                title="",
                tickCount=5,
                gridColor="#dddddd",
                offset=6,
                tickSize=0,
                domainOpacity=0,
            ),
        ),
        facet=alt.Facet(
            "region",
            columns=5,
            title=" ",
            sort=alt.EncodingSortField(
                "covid_patients_share", op="max", order="descending"
            ),
        ),
        #     color=alt.Color('region', legend=alt.Legend(orient="top", title=' '))
    )
    .properties(
        width=200,
        height=200,
        title="Share of all hospital patients with COVID-19 by region",
    )
)

chart.configure_view(strokeOpacity=0)

---

### Hospital overcapacity

In [109]:
before = pd.read_csv("input/hospital_utilization.csv", dtype={"fac_no": str})

In [110]:
before[before["fac_name"].str.contains("KING")]

Unnamed: 0,fac_no,fac_name,fac_city,county,license_no,fac_zip,icu_beds,tot_lic_beds
185,106191230,"MARTIN LUTHER KING, JR. COMMUNITY HOSPITAL",LOS ANGELES,Los Angeles,60000132,90059,20,131


In [111]:
current_ca[current_ca["ccn"] == "050779"]

Unnamed: 0,hospital,hospital_subtype,ccn,week,county,fips,total_beds,total_staffed_adult_icu_beds,total_covid_icu_patients,total_available_adult_icu_beds,pct_occupied_adult_icu_beds,total_covid_patients,all_patients,covid_patients_share,region
77,"Martin Luther King, Jr. Community Hospital",Short Term,50779,2021-01-29,Los Angeles,37,260.0,42.0,28.0,0.0,1.0,112.0,212.0,0.53,Southern California


In [112]:
before[["fac_name"]].sort_values("fac_name", ascending=False).to_csv(
    "output/before_names.csv", index=False
)

In [113]:
current_ca[["hospital"]].sort_values("hospital", ascending=False).to_csv(
    "output/current_names.csv", index=False
)