# Analyzing HHS hospital occupancy data

#### An analysis of HHS hospital data, by @datagraphics and @stiles.

#### **Questions?** [matt.stiles@latimes.com](matt.stiles@latimes.com) \\ 310.529.8749

---

### Import Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import jenkspy
import matplotlib.pyplot as plt

%matplotlib inline
import json
import numpy as np
import altair as alt
import altair_latimes as lat

alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

---

### Define cities and regions metadata

In [3]:
metadata_df = pd.read_csv("input/city-metadata.csv", dtype={"fips": str})

In [4]:
metadata_df = metadata_df[["county", "fips", "population", "region"]]

In [5]:
socal = ["Los Angeles", "Orange", "Ventura", "San Bernardino", "Riverside"]
bayarea = [
    "Alameda",
    "Contra Costa",
    "Marin",
    "Napa",
    "San Francisco",
    "San Mateo",
    "Santa Clara",
    "Solano",
    "Sonoma",
]

### Get the latest url from the HHS API and read in the latest dataframe

In [6]:
# Data dictionary: https://healthdata.gov/covid-19-reported-patient-impact-and-hospital-capacity-facility-data-dictionary
# Data source: https://healthdata.gov/dataset/covid-19-reported-patient-impact-and-hospital-capacity-facility

In [7]:
src = pd.read_csv(
    "https://healthdata.gov/api/views/anag-cw7u/rows.csv?accessType=DOWNLOAD",
    dtype={"fips_code": str, "zip": str},
    infer_datetime_format=True,
    parse_dates=True,
)

In [8]:
# src = pd.read_csv('https://healthdata.gov/sites/default/files/reported_hospital_capacity_admissions_facility_level_weekly_average_timeseries_20201228.csv',\
# dtype={'fips_code':str, 'zip':str}, infer_datetime_format=True, parse_dates=True)

In [9]:
df = src.copy()

In [10]:
ca_data = pd.DataFrame(df.loc[df.state == "CA"])

In [11]:
ca_data["fips"] = ca_data["fips_code"].str[2:5]

In [12]:
ca_timeseries = pd.merge(
    ca_data, metadata_df, how="left", left_on="fips", right_on="fips"
)

### What's the most recent collection week?

In [13]:
ca_timeseries["collection_week"] = pd.to_datetime(ca_timeseries["collection_week"])

In [14]:
ca_timeseries["collection_week"].max()

Timestamp('2021-03-19 00:00:00')

### Get rid of the -999999 suppressed values

In [15]:
ca_timeseries = ca_timeseries.replace([-999999.0], [0])

In [16]:
ca_timeseries["hospital_name"] = (ca_timeseries["hospital_name"]).str.title()
ca_timeseries["hospital_name"] = (
    (ca_timeseries["hospital_name"])
    .str.replace(" Of ", " of ")
    .str.replace("Hlth", "Health")
    .str.replace(" La", " LA")
)

---

## Calculate some hospitalization/covid rates

In [17]:
ca_timeseries.head()

Unnamed: 0,hospital_pk,collection_week,state,ccn,hospital_name,address,city,zip,hospital_subtype,fips_code,is_metro_micro,total_beds_7_day_avg,all_adult_hospital_beds_7_day_avg,all_adult_hospital_inpatient_beds_7_day_avg,inpatient_beds_used_7_day_avg,all_adult_hospital_inpatient_bed_occupied_7_day_avg,total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg,total_adult_patients_hospitalized_confirmed_covid_7_day_avg,total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg,total_pediatric_patients_hospitalized_confirmed_covid_7_day_avg,inpatient_beds_7_day_avg,total_icu_beds_7_day_avg,total_staffed_adult_icu_beds_7_day_avg,icu_beds_used_7_day_avg,staffed_adult_icu_bed_occupancy_7_day_avg,...,previous_day_admission_adult_covid_confirmed_50-59_7_day_sum,previous_day_admission_adult_covid_confirmed_60-69_7_day_sum,previous_day_admission_adult_covid_confirmed_70-79_7_day_sum,previous_day_admission_adult_covid_confirmed_80+_7_day_sum,previous_day_admission_adult_covid_confirmed_unknown_7_day_sum,previous_day_admission_pediatric_covid_confirmed_7_day_sum,previous_day_covid_ED_visits_7_day_sum,previous_day_admission_adult_covid_suspected_7_day_sum,previous_day_admission_adult_covid_suspected_18-19_7_day_sum,previous_day_admission_adult_covid_suspected_20-29_7_day_sum,previous_day_admission_adult_covid_suspected_30-39_7_day_sum,previous_day_admission_adult_covid_suspected_40-49_7_day_sum,previous_day_admission_adult_covid_suspected_50-59_7_day_sum,previous_day_admission_adult_covid_suspected_60-69_7_day_sum,previous_day_admission_adult_covid_suspected_70-79_7_day_sum,previous_day_admission_adult_covid_suspected_80+_7_day_sum,previous_day_admission_adult_covid_suspected_unknown_7_day_sum,previous_day_admission_pediatric_covid_suspected_7_day_sum,previous_day_total_ED_visits_7_day_sum,previous_day_admission_influenza_confirmed_7_day_sum,geocoded_hospital_address,fips,county,population,region
0,50002,2021-03-19,CA,50002,St Rose Hospital,27200 CALAROGA AVE,HAYWARD,94545,Short Term,6001,True,95.0,95.0,95.0,53.3,53.3,0.0,0.0,0.0,0.0,95.0,14.0,14.0,8.0,8.0,...,0.0,0.0,0.0,0.0,0.0,0.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,424.0,0.0,POINT (-122.089742 37.63291),1,Alameda,1643700,Bay Area
1,50195,2021-03-19,CA,50195,Washington Hospital,2000 MOWRY AVE,FREMONT,94538,Short Term,6001,True,215.0,202.0,180.0,159.9,157.4,6.9,5.6,0.0,0.0,193.0,37.0,28.0,23.1,20.9,...,0.0,0.0,0.0,0.0,0.0,0.0,50.0,17.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,5.0,0.0,0.0,775.0,0.0,POINT (-121.98087 37.557653),1,Alameda,1643700,Bay Area
2,50320,2021-03-19,CA,50320,Highland Hospital,1411 EAST 31ST STREET,OAKLAND,94602,Short Term,6001,True,432.0,424.0,424.0,157.9,156.7,10.7,9.3,0.0,0.0,432.0,41.0,33.0,22.4,21.3,...,0.0,0.0,0.0,0.0,0.0,0.0,566.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1411.0,0.0,POINT (-122.231199 37.799234),1,Alameda,1643700,Bay Area
3,50488,2021-03-19,CA,50488,Eden Medical Center,20103 LAKE CHABOT ROAD,CASTRO VALLEY,94546,Short Term,6001,True,150.1,150.0,132.6,126.3,125.9,11.6,10.4,0.0,0.0,132.7,24.1,24.0,20.7,20.6,...,0.0,0.0,0.0,0.0,0.0,0.0,248.0,20.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,7.0,0.0,0.0,715.0,0.0,,1,Alameda,1643700,Bay Area
4,50075,2021-03-19,CA,50075,Kaiser Foundation Hospital - Oakland/Richmond,275 WEST MACARTHUR BOULEVARD,OAKLAND,94611,Short Term,6001,True,313.0,223.9,223.9,243.1,203.1,26.0,18.6,0.0,0.0,313.0,43.7,19.3,40.9,18.3,...,0.0,0.0,0.0,0.0,0.0,0.0,80.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,5.0,1053.0,0.0,POINT (-122.257589 37.823857),1,Alameda,1643700,Bay Area


#### How many beds

In [18]:
ca_timeseries["total_beds"] = round(
    (
        ca_timeseries["total_beds_7_day_sum"]
        / ca_timeseries["total_beds_7_day_coverage"]
    ),
    0,
)

#### Calculate daily average of total staffed ICU beds

In [19]:
ca_timeseries["total_staffed_adult_icu_beds"] = round(
    (
        ca_timeseries["total_staffed_adult_icu_beds_7_day_sum"]
        / ca_timeseries["total_staffed_adult_icu_beds_7_day_coverage"]
    ),
    0,
)

#### Calculate daily average of occupied ICU beds

In [20]:
ca_timeseries["total_occupied_adult_icu_beds"] = round(
    (
        ca_timeseries["staffed_adult_icu_bed_occupancy_7_day_sum"]
        / ca_timeseries["staffed_adult_icu_bed_occupancy_7_day_coverage"]
    ),
    0,
)

#### Calculate daily average of COVID patients in the ICU

In [21]:
ca_timeseries["total_covid_icu_patients"] = round(
    (
        ca_timeseries[
            "staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum"
        ]
        / ca_timeseries[
            "staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_coverage"
        ]
    ),
    0,
)

#### Number of ICU beds available

In [22]:
ca_timeseries["total_available_adult_icu_beds"] = round(
    (
        ca_timeseries["total_staffed_adult_icu_beds"]
        - ca_timeseries["total_occupied_adult_icu_beds"]
    ),
    0,
)

#### Calculate daily ICU occupancy as percentage

In [23]:
ca_timeseries["pct_occupied_adult_icu_beds"] = round(
    (
        ca_timeseries["total_occupied_adult_icu_beds"]
        / ca_timeseries["total_staffed_adult_icu_beds"]
    ),
    2,
)

#### If a hospital reports 0 staffed adult ICU beds, drop them from the dataframe

In [24]:
filtered_timeseries = ca_timeseries[
    (ca_timeseries.total_staffed_adult_icu_beds_7_day_sum.notnull())
    & (ca_timeseries.total_staffed_adult_icu_beds_7_day_sum != 0)
].sort_values("total_staffed_adult_icu_beds_7_day_sum")

#### How many patients?

In [25]:
ca_timeseries["all_patients"] = round(
    (
        ca_timeseries["inpatient_beds_used_7_day_sum"]
        / ca_timeseries["inpatient_beds_used_7_day_coverage"]
    ),
    0,
)

#### How many Covid patients

In [26]:
ca_timeseries["total_adult_covid_patients"] = (
    ca_timeseries[
        "total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum"
    ]
    / ca_timeseries[
        "total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage"
    ]
)

In [27]:
ca_timeseries["total_pediatric_covid_patients"] = (
    ca_timeseries[
        "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum"
    ]
    / ca_timeseries[
        "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage"
    ]
)

In [28]:
ca_timeseries["total_covid_patients"] = round(
    ca_timeseries["total_adult_covid_patients"]
    + ca_timeseries["total_pediatric_covid_patients"],
    0,
).fillna(0)

#### What's the rate of Covid patients?

In [29]:
ca_timeseries["covid_patients_share"] = round(
    (ca_timeseries["total_covid_patients"] / ca_timeseries["all_patients"]), 2
)

In [30]:
ca_timeseries = ca_timeseries[ca_timeseries["all_patients"] > 0]

---

### Trim to the columns we want

In [31]:
trimmed_timeseries = ca_timeseries[
    [
        "hospital_name",
        "hospital_subtype",
        "ccn",
        "collection_week",
        "county",
        "fips",
        "total_beds",
        "total_staffed_adult_icu_beds",
        "total_covid_icu_patients",
        "total_available_adult_icu_beds",
        "pct_occupied_adult_icu_beds",
        "total_covid_patients",
        "all_patients",
        "covid_patients_share",
        "region",
    ]
].rename(
    columns={
        "hospital_name": "hospital",
        "collection_week": "week",
    }
)

In [32]:
trimmed_timeseries.covid_patients_share.mean()

0.1728466351031367

In [33]:
trimmed_timeseries.to_csv("output/trimmed_timeseries.csv", index=False)

---

### Filter the dataframe for Los Angeles County facilities

In [34]:
la = trimmed_timeseries[trimmed_timeseries["fips"] == "037"]
oc = trimmed_timeseries[trimmed_timeseries["fips"] == "059"]

### Filter the California dataframe to the most recent collection week

In [35]:
current_ca = trimmed_timeseries[
    trimmed_timeseries["week"] == trimmed_timeseries["week"].max()
].sort_values("covid_patients_share", ascending=False)

In [36]:
current_la = la[la["week"] == la["week"].max()].sort_values(
    "covid_patients_share", ascending=False
)

In [37]:
current_oc = oc[oc["week"] == oc["week"].max()].sort_values(
    "covid_patients_share", ascending=False
)

In [38]:
current_ca.to_csv("output/current_ca.csv", index=False)

### Specific place?

In [39]:
current_ca[current_ca["hospital"].str.contains("Methodist Hospital")].iloc[0]

hospital                          Methodist Hospital of Sacramento
hospital_subtype                                        Short Term
ccn                                                         050590
week                                           2021-03-19 00:00:00
county                                                  Sacramento
fips                                                           067
total_beds                                                     206
total_staffed_adult_icu_beds                                    20
total_covid_icu_patients                                         1
total_available_adult_icu_beds                                  13
pct_occupied_adult_icu_beds                                   0.35
total_covid_patients                                             6
all_patients                                                    87
covid_patients_share                                          0.07
region                                          Greater Sacram

### Which CA hospitals have the greatest share of covid patients? 

In [40]:
current_ca[
    (current_ca["hospital_subtype"] != "Long Term")
    & (current_ca["total_covid_icu_patients"] > 0)
][
    [
        "hospital",
        "total_beds",
        "total_covid_patients",
        "all_patients",
        "covid_patients_share",
        "region",
        "county",
    ]
].sort_values(
    "covid_patients_share", ascending=False
).head(
    10
)

Unnamed: 0,hospital,total_beds,total_covid_patients,all_patients,covid_patients_share,region,county
333,Oak Valley Hospital District,22.0,5.0,10.0,0.5,San Joaquin Valley,Stanislaus
145,Madera Community Hospital,80.0,13.0,30.0,0.43,San Joaquin Valley,Madera
155,Memorial Hospital Los Banos,41.0,7.0,20.0,0.35,San Joaquin Valley,Merced
41,Mad River Community Hospital,41.0,5.0,17.0,0.29,Northern California,Humboldt
277,Kaiser Foundation Hospital Manteca,183.0,36.0,157.0,0.23,San Joaquin Valley,San Joaquin
52,Adventist Health Tehachapi Valley,31.0,3.0,14.0,0.21,San Joaquin Valley,Kern
340,Sierra View Medical Center,149.0,13.0,62.0,0.21,San Joaquin Valley,Tulare
184,Garden Grove Hospital & Medical Center,55.0,7.0,33.0,0.21,Southern California,Orange
122,Providence Little Co of Mary Med Ctr San Pedro,76.0,10.0,53.0,0.19,Southern California,Los Angeles
208,Corona Regional Medical Center,172.0,20.0,104.0,0.19,Southern California,Riverside


### Which facilities in LA have the greatest share of covid patients? 

In [41]:
current_la[
    (current_la["hospital_subtype"] != "Long Term")
    & (current_la["total_covid_icu_patients"] > 0)
][
    [
        "hospital",
        "total_beds",
        "total_covid_patients",
        "all_patients",
        "covid_patients_share",
    ]
].sort_values(
    "covid_patients_share", ascending=False
).head(
    10
)

Unnamed: 0,hospital,total_beds,total_covid_patients,all_patients,covid_patients_share
122,Providence Little Co of Mary Med Ctr San Pedro,76.0,10.0,53.0,0.19
139,"Martin Luther King, Jr. Community Hospital",184.0,19.0,109.0,0.17
137,Cedar-Sinai Marina Del Rey Hospital,91.0,8.0,57.0,0.14
133,West Hills Hospital & Medical Center,261.0,14.0,98.0,0.14
71,Whittier Hospital Medical Center,174.0,11.0,85.0,0.13
121,Monterey Park Hospital,96.0,6.0,52.0,0.12
69,Lac/Rancho Los Amigos National Rehabilitation ...,137.0,14.0,118.0,0.12
113,Kaiser Foundation Hospital - West LA,219.0,15.0,126.0,0.12
73,Kaiser Foundation Hospital - Downey,354.0,21.0,188.0,0.11
68,Keck Hospital of Usc,401.0,27.0,245.0,0.11


### Which places have the lowest share? 

In [42]:
current_la[
    (current_la["hospital_subtype"] != "Long Term")
    & (current_la["total_covid_icu_patients"] > 0)
][
    [
        "hospital",
        "total_beds",
        "total_covid_patients",
        "all_patients",
        "covid_patients_share",
    ]
].sort_values(
    "covid_patients_share", ascending=False
).tail(
    10
)

Unnamed: 0,hospital,total_beds,total_covid_patients,all_patients,covid_patients_share
120,Antelope Valley Hospital,217.0,8.0,215.0,0.04
138,Providence-Cedars Sinai Tarzana Medical Center,165.0,3.0,117.0,0.03
83,Garfield Medical Center,210.0,4.0,120.0,0.03
134,Mission Community Hospital,237.0,5.0,153.0,0.03
111,City of Hope Helford Clinical Research Hospital,231.0,7.0,208.0,0.03
101,Pih Health Hospital-Whittier,398.0,7.0,237.0,0.03
107,Ronald Reagan U C L A Medical Center,460.0,11.0,389.0,0.03
104,Santa Monica - Ucla Med Ctr & Orthopaedic Hosp...,296.0,7.0,217.0,0.03
61,St Mary Medical Center,406.0,5.0,151.0,0.03
80,Cedars-Sinai Medical Center,842.0,18.0,749.0,0.02


### Which facilities in OC have the greatest share of covid patients? 

In [43]:
current_oc[
    (current_oc["hospital_subtype"] != "Long Term")
    & (current_oc["total_covid_icu_patients"] > 0)
][
    ["hospital", "total_covid_patients", "all_patients", "covid_patients_share"]
].sort_values(
    "covid_patients_share", ascending=False
).head(
    5
)

Unnamed: 0,hospital,total_covid_patients,all_patients,covid_patients_share
184,Garden Grove Hospital & Medical Center,7.0,33.0,0.21
173,Kaiser Foundation Hospital - Orange County - A...,26.0,260.0,0.1
175,Providence St. Joseph Hospital,23.0,230.0,0.1
178,Los Alamitos Medical Center,13.0,134.0,0.1
190,Fountain Valley Regional Hospital & Medical Ce...,14.0,199.0,0.07


In [44]:
current_oc[
    (current_oc["hospital_subtype"] != "Long Term")
    & (current_oc["total_covid_icu_patients"] > 0)
][
    [
        "hospital",
        "total_beds",
        "total_covid_patients",
        "all_patients",
        "covid_patients_share",
    ]
].sort_values(
    "covid_patients_share", ascending=False
).tail(
    10
)

Unnamed: 0,hospital,total_beds,total_covid_patients,all_patients,covid_patients_share
177,Huntington Beach Hospital,70.0,3.0,50.0,0.06
193,Providence Mission Hospital,258.0,10.0,193.0,0.05
188,Orange County Global Medical Center,337.0,7.0,183.0,0.04
185,West Anaheim Medical Center,137.0,4.0,96.0,0.04
168,Anaheim Global Medical Center,189.0,5.0,113.0,0.04
191,La Palma Intercommunity Hospital,39.0,1.0,27.0,0.04
189,Foothill Regional Medical Center,130.0,1.0,30.0,0.03
194,University of California Irvine Medical Center,409.0,10.0,333.0,0.03
182,Hoag Memorial Hospital Presbyterian,395.0,11.0,355.0,0.03
183,Memorialcare Saddleback Medical Center,228.0,1.0,111.0,0.01


In [45]:
current_la["covid_patients_share"] = round(
    (current_la["covid_patients_share"] * 100), 2
)

### Output top LA hospitals for CMS table

In [46]:
current_la[
    (current_la["hospital_subtype"] != "Long Term")
    & (current_la["total_covid_icu_patients"] > 0)
][["hospital", "total_covid_patients", "covid_patients_share"]].sort_values(
    "covid_patients_share", ascending=False
).head(
    10
).to_csv(
    "output/current_la_hospitals.csv", index=False
)

### Which facilities in LA have the highest ICU capacity rates? 

In [47]:
current_la[(current_la["total_covid_icu_patients"] > 0)].sort_values(
    "pct_occupied_adult_icu_beds", ascending=False
).head(10)

Unnamed: 0,hospital,hospital_subtype,ccn,week,county,fips,total_beds,total_staffed_adult_icu_beds,total_covid_icu_patients,total_available_adult_icu_beds,pct_occupied_adult_icu_beds,total_covid_patients,all_patients,covid_patients_share,region
119,Kindred Hospital South Bay,Long Term,52050,2021-03-19,Los Angeles,37,59.0,9.0,1.0,0.0,1.0,11.0,49.0,22.0,Southern California
60,Community Hospital of Huntington Park,Short Term,50091,2021-03-19,Los Angeles,37,65.0,6.0,1.0,0.0,1.0,3.0,30.0,10.0,Southern California
138,Providence-Cedars Sinai Tarzana Medical Center,Short Term,50761,2021-03-19,Los Angeles,37,165.0,9.0,1.0,0.0,1.0,3.0,117.0,3.0,Southern California
143,Kindred Hospital Paramount,Long Term,52046,2021-03-19,Los Angeles,37,177.0,12.0,1.0,0.0,1.0,3.0,84.0,4.0,Southern California
122,Providence Little Co of Mary Med Ctr San Pedro,Short Term,50078,2021-03-19,Los Angeles,37,76.0,9.0,4.0,0.0,1.0,10.0,53.0,19.0,Southern California
144,Glendale Mem Hospital & Health Center,Short Term,50058,2021-03-19,Los Angeles,37,334.0,12.0,2.0,0.0,1.0,9.0,112.0,8.0,Southern California
64,Memorial Hospital of Gardena,Short Term,50468,2021-03-19,Los Angeles,37,172.0,10.0,1.0,0.0,1.0,10.0,112.0,9.0,Southern California
120,Antelope Valley Hospital,Short Term,50056,2021-03-19,Los Angeles,37,217.0,30.0,1.0,1.0,0.97,8.0,215.0,4.0,Southern California
102,Providence Holy Cross Medical Center,Short Term,50278,2021-03-19,Los Angeles,37,294.0,28.0,8.0,1.0,0.96,21.0,249.0,8.0,Southern California
81,Henry Mayo Newhall Hospital,Short Term,50624,2021-03-19,Los Angeles,37,378.0,19.0,1.0,1.0,0.95,9.0,180.0,5.0,Southern California


### How many LA County hospitals (with Covid patients) are in our dataframe? 

In [48]:
len(current_la[(current_la["total_covid_icu_patients"] > 0)])

63

### How many have 'high' ICU occupancy rates? 

In [49]:
len(current_la[current_la["pct_occupied_adult_icu_beds"] > 0.90])

21

### Top 10 Bay Area hospotals by Covid share? 

In [50]:
current_ca[current_ca["county"].isin(bayarea)].sort_values(
    "covid_patients_share", ascending=False
).head(10)

Unnamed: 0,hospital,hospital_subtype,ccn,week,county,fips,total_beds,total_staffed_adult_icu_beds,total_covid_icu_patients,total_available_adult_icu_beds,pct_occupied_adult_icu_beds,total_covid_patients,all_patients,covid_patients_share,region
300,Kaiser Foundation Hospital-San Jose,Short Term,50604,2021-03-19,Santa Clara,85,158.0,19.0,9.0,2.0,0.89,25.0,135.0,0.19,Bay Area
290,Kaiser Foundation Hospital - South San Francisco,Short Term,50070,2021-03-19,San Mateo,81,88.0,7.0,1.0,1.0,0.86,13.0,76.0,0.17,Bay Area
149,Kaiser Foundation Hospital,Short Term,50510,2021-03-19,Marin,41,63.0,7.0,3.0,1.0,0.86,8.0,55.0,0.15,Bay Area
7,Kaiser Foundation Hospital - Fremont,Short Term,50512,2021-03-19,Alameda,1,90.0,8.0,2.0,1.0,0.88,11.0,76.0,0.14,Bay Area
292,San Mateo Medical Center,Short Term,50113,2021-03-19,San Mateo,81,46.0,4.0,1.0,0.0,1.0,4.0,31.0,0.13,Bay Area
12,Kaiser Foundation Hospital - San Leandro,Short Term,50777,2021-03-19,Alameda,1,180.0,14.0,4.0,1.0,0.93,19.0,149.0,0.13,Bay Area
299,Regional Medical Center of San Jose,Short Term,50125,2021-03-19,Santa Clara,85,237.0,34.0,5.0,2.0,0.94,19.0,154.0,0.12,Bay Area
4,Kaiser Foundation Hospital - Oakland/Richmond,Short Term,50075,2021-03-19,Alameda,1,313.0,19.0,6.0,1.0,0.95,28.0,243.0,0.12,Bay Area
11,Stanford Health Care - Valleycare,Short Term,50283,2021-03-19,Alameda,1,257.0,22.0,3.0,5.0,0.77,10.0,87.0,0.11,Bay Area
3,Eden Medical Center,Short Term,50488,2021-03-19,Alameda,1,150.0,24.0,4.0,3.0,0.88,12.0,126.0,0.1,Bay Area


---

## Get medical facilities' geographic data

In [51]:
# From HHS: https://maps3.arcgisonline.com/arcgis/rest/services/A-16/HHS_IOM_Health_Resources/MapServer

In [52]:
hospitals = gpd.read_file(
    "/Users/mhustiles/data/github/AGStoShapefile/backupdir/HHS_Hospital/HHS_Hospital_1608139617293.geojson"
)

In [53]:
medical_centers = gpd.read_file(
    "/Users/mhustiles/data/github/AGStoShapefile/backupdir/HHS_Medical_Center/HHS_Medical_Center_1608139616289.geojson"
)

### Clean up the column names so we can merge the facilities dataframes

In [54]:
facilities = [hospitals, medical_centers]

In [55]:
hospitals.rename(
    columns={
        "Name_new": "name",
        "Address_1": "address",
        "City": "city",
        "State_1": "state",
        "ZipCode": "zipcode",
        "PhoneNum": "phone",
        "County_Nam": "county",
        "Provider_N": "provider_id",
        "Hospital_T": "type",
        "Hospital_O": "operation",
        "Emergency_": "emergency",
    },
    inplace=True,
)

In [56]:
medical_centers.rename(
    columns={
        "Hospital_N": "name",
        "Address1": "address",
        "City_1": "city",
        "State_1": "state",
        "ZipCode": "zipcode",
        "PhoneNum": "phone",
        "County_Nam": "county",
        "Provider_N": "provider_id",
        "Hospital_T": "type",
        "Hospital_O": "operation",
        "Emergency_": "emergency",
    },
    inplace=True,
)

### Concatenate the different facility types into one dataframe, and also filter that just to CA

In [57]:
all_medical_geo = pd.concat(facilities)

In [58]:
all_medical_geo_ca = all_medical_geo[all_medical_geo["state"] == "CA"]

### We might be better off using HHS' own locations data with CCNs

In [59]:
locations = pd.read_csv(
    "input/hospital_locations.csv",
    dtype={"latitude": float, "longitude": float, "CCN": str},
)

In [60]:
locations.columns = (
    locations.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=False)
    .str.replace("(", "", regex=False)
    .str.replace(")", "", regex=False)
    .str.replace("-", "_", regex=False)
)

### Even though it has the wrong location for White Memorial. Let's fix.

In [61]:
locations.loc[
    (locations.facility_name == "WHITE MEMORIAL MEDICAL CENTER"), "longitude"
] = -118.2176219

In [62]:
locations.loc[
    (locations.facility_name == "WHITE MEMORIAL MEDICAL CENTER"), "latitude"
] = 34.0493044

### Convert the lon/lat fields into a geodataframe

In [63]:
locations = gpd.GeoDataFrame(
    locations, geometry=gpd.points_from_xy(locations.longitude, locations.latitude)
)

### And then confine it to California

In [64]:
locationsca = gpd.GeoDataFrame(locations[locations["state"] == "CA"])

### Make the CCN string match the hospital capacity data

In [65]:
locationsca["ccn"] = locationsca["ccn"].str.zfill(6)

---

### Merge with our CA medical facilities

In [66]:
current_ca_geo = locationsca.merge(current_ca, left_on="ccn", right_on="ccn")

In [67]:
len(current_ca_geo)

348

In [68]:
current_ca_geo.rename(
    columns={
        "county_x": "county",
        "city_x": "city",
        "address_x": "address",
        "state_x": "state",
    },
    inplace=True,
)

In [69]:
# current_ca_geo.drop(['county_y', 'hospital_subtype', 'provider_id', 'ccn', 'emergency', 'address', 'OBJECTID', 'state', 'name', 'phone', 'zipcode'], axis=1, inplace=True)

---

### Filter to just LA County hospitals

In [70]:
current_la_geo = current_ca_geo[current_ca_geo["fips"] == "037"]

### How many facilities in California?

In [71]:
len(current_ca_geo)

348

### How many in L.A. County?

In [72]:
len(current_la_geo)

85

---

### Export hospital specific data for maps and tables

In [73]:
current_ca_geo.to_file("output/current_ca_geo.geojson", driver="GeoJSON")

In [74]:
current_ca_geo.to_csv("output/current_ca_geo.csv", index=False)

In [75]:
current_la_geo.to_file("output/current_la_geo.geojson", driver="GeoJSON")

In [76]:
current_la_geo.to_csv("output/current_la_geo.csv", index=False)

---

## Aggregate

### What's the average share of Covid patients in hospitals statewide?

In [77]:
state = (
    current_ca.groupby(["week"])
    .agg({"total_covid_patients": "sum", "all_patients": "sum"})
    .reset_index()
)

In [78]:
state["covid_patients_share"] = round(
    (state["total_covid_patients"] / state["all_patients"]), 2
)

In [79]:
state.head()

Unnamed: 0,week,total_covid_patients,all_patients,covid_patients_share
0,2021-03-19,2950.0,43761.0,0.07


### Covid patients by county

In [80]:
counties = (
    trimmed_timeseries.groupby(["county", "week", "region"])
    .agg({"total_covid_patients": "sum", "all_patients": "sum"})
    .reset_index()
)

In [81]:
counties["covid_patients_share"] = round(
    (counties["total_covid_patients"] / counties["all_patients"]), 2
)

#### Which counties have the most Covid patients (in most recent week)?

In [82]:
counties[counties["week"] == counties["week"].max()].sort_values(
    "total_covid_patients", ascending=False
).head(5)

Unnamed: 0,county,week,region,total_covid_patients,all_patients,covid_patients_share
607,Los Angeles,2021-03-19,Southern California,864.0,12934.0,0.07
1218,San Diego,2021-03-19,Southern California,248.0,3867.0,0.06
981,Orange,2021-03-19,Southern California,182.0,3442.0,0.05
1184,San Bernardino,2021-03-19,Southern California,178.0,2439.0,0.07
1083,Riverside,2021-03-19,Southern California,169.0,2109.0,0.08


#### Which counties have the highest percentage of Covid patients (in most recent week)?

In [83]:
counties[counties["week"] == counties["week"].max()].sort_values(
    "total_covid_patients", ascending=False
).head(5)

Unnamed: 0,county,week,region,total_covid_patients,all_patients,covid_patients_share
607,Los Angeles,2021-03-19,Southern California,864.0,12934.0,0.07
1218,San Diego,2021-03-19,Southern California,248.0,3867.0,0.06
981,Orange,2021-03-19,Southern California,182.0,3442.0,0.05
1184,San Bernardino,2021-03-19,Southern California,178.0,2439.0,0.07
1083,Riverside,2021-03-19,Southern California,169.0,2109.0,0.08


### How has this changed over time? 

In [84]:
counties_grouped = (
    counties.groupby(["county", "week", "region"])
    .agg({"total_covid_patients": "sum", "covid_patients_share": "mean"})
    .reset_index()
)

In [85]:
la_grouped = pd.DataFrame(counties_grouped[counties_grouped["county"] == "Los Angeles"])

In [86]:
la_grouped.tail()

Unnamed: 0,county,week,region,total_covid_patients,covid_patients_share
603,Los Angeles,2021-02-19,Southern California,2282.0,0.17
604,Los Angeles,2021-02-26,Southern California,1639.0,0.13
605,Los Angeles,2021-03-05,Southern California,1237.0,0.1
606,Los Angeles,2021-03-12,Southern California,1035.0,0.08
607,Los Angeles,2021-03-19,Southern California,864.0,0.07


In [87]:
la_grouped.to_csv("output/la_grouped.csv", index=False)

In [88]:
alt.Chart(la_grouped).mark_area().encode(
    x=alt.X("week:T", axis=alt.Axis(format="", title=" ", tickCount=3)),
    y=alt.Y(
        "covid_patients_share",
        title="Covid patients in LA County hospitals",
        axis=alt.Axis(format="%", tickCount=6),
    ),
).properties(width=500, height=500)

### Just southern California counties? 

In [89]:
socal_grouped = (
    counties_grouped[counties_grouped["county"].isin(socal)]
    .groupby(["county", "week"])
    .agg({"total_covid_patients": "sum", "covid_patients_share": "mean"})
    .reset_index()
)

In [90]:
alt.Chart(socal_grouped).mark_area().encode(
    x=alt.X("week:T", axis=alt.Axis(format="", title=" ", tickCount=3)),
    y=alt.Y(
        "covid_patients_share",
        title=" ",
        axis=alt.Axis(format="%", title="", tickCount=6),
    ),
    facet=alt.Facet("county"),
).properties(
    width=200, height=200, title="Share of all patients with Covid in SoCal Counties"
)

### Bay Area? 

In [91]:
bayarea_grouped = (
    counties_grouped[counties_grouped["county"].isin(bayarea)]
    .groupby(["week"])
    .agg({"total_covid_patients": "sum", "covid_patients_share": "mean"})
    .reset_index()
)

In [92]:
bayarea_grouped.tail()

Unnamed: 0,week,total_covid_patients,covid_patients_share
29,2021-02-19,834.0,0.104444
30,2021-02-26,696.0,0.085556
31,2021-03-05,561.0,0.072222
32,2021-03-12,506.0,0.062222
33,2021-03-19,429.0,0.052222


In [93]:
alt.Chart(bayarea_grouped).mark_area().encode(
    x=alt.X("week:T", axis=alt.Axis(format="", title=" ", tickCount=3)),
    y=alt.Y(
        "covid_patients_share",
        title=" ",
        axis=alt.Axis(
            format="%",
            title="Share of all patients with Covid in Bay Area",
            tickCount=6,
        ),
    ),
).properties(width=500, height=500)

---

### What's going on in the most-populous counties 

In [94]:
counties_pop = pd.read_csv(
    "../census/processed/acs5_2018_population_counties.csv",
    dtype={"geoid": str, "population": int, "state": str, "county": str},
)

In [95]:
counties_pop.rename(columns={"universe": "population"}, inplace=True)

In [96]:
counties_pop.drop(
    ["universe_annotation", "universe_moe", "universe_moe_annotation"],
    axis=1,
    inplace=True,
)

In [97]:
ca_counties_pop = counties_pop[counties_pop["state"] == "06"]

In [98]:
big_ca_counties_pop = ca_counties_pop.sort_values("population", ascending=False).head(
    10
)

In [99]:
big_ca_counties_pop["name"] = big_ca_counties_pop["name"].str.replace(
    " County, California", ""
)

In [100]:
big_counties = list(big_ca_counties_pop.name)

In [101]:
big_counties_grouped = (
    counties_grouped[counties_grouped["county"].isin(big_counties)]
    .groupby(["county", "week", "region"])
    .agg({"total_covid_patients": "sum", "covid_patients_share": "mean"})
    .reset_index()
)

In [102]:
chart = (
    alt.Chart(big_counties_grouped)
    .mark_area()
    .encode(
        x=alt.X(
            "week:T", axis=alt.Axis(grid=False, format="%b.", title=" ", tickCount=4)
        ),
        y=alt.Y(
            "covid_patients_share",
            title=" ",
            axis=alt.Axis(
                format="%",
                title="",
                tickCount=5,
                gridColor="#dddddd",
                offset=6,
                tickSize=0,
                domainOpacity=0,
            ),
        ),
        facet=alt.Facet(
            "county",
            columns=5,
            title=" ",
            sort=alt.EncodingSortField(
                "covid_patients_share", op="max", order="descending"
            ),
        ),
        color=alt.Color("region", legend=alt.Legend(orient="top", title=" ")),
    )
    .properties(
        width=200,
        height=200,
        title="Share of all hospital patients with COVID-19 in large California counties",
    )
)

chart.configure_view(strokeOpacity=0)

---

### Regions

In [103]:
regions_timeseries = (
    trimmed_timeseries.groupby(["week", "region"])
    .agg(
        {
            "total_covid_patients": "sum",
            "all_patients": "sum",
            "total_beds": "sum",
            "total_staffed_adult_icu_beds": sum,
        }
    )
    .reset_index()
)

In [104]:
regions_timeseries["covid_patients_share"] = round(
    (regions_timeseries["total_covid_patients"] / regions_timeseries["all_patients"]), 2
)

In [105]:
regions_timeseries.tail(5)

Unnamed: 0,week,region,total_covid_patients,all_patients,total_beds,total_staffed_adult_icu_beds,covid_patients_share
165,2021-03-19,Bay Area,460.0,8688.0,13143.0,1327.0,0.05
166,2021-03-19,Greater Sacramento,172.0,3483.0,4599.0,492.0,0.05
167,2021-03-19,Northern California,26.0,719.0,1322.0,128.0,0.04
168,2021-03-19,San Joaquin Valley,529.0,4630.0,6294.0,638.0,0.11
169,2021-03-19,Southern California,1763.0,26241.0,40384.0,4425.0,0.07


In [106]:
regions_timeseries[
    regions_timeseries["week"] == "2021-01-01"
].covid_patients_share.mean()

0.32

In [107]:
chart = (
    alt.Chart(regions_timeseries)
    .mark_area()
    .encode(
        x=alt.X(
            "week:T", axis=alt.Axis(grid=False, format="%b.", title=" ", tickCount=4)
        ),
        y=alt.Y(
            "covid_patients_share",
            title=" ",
            axis=alt.Axis(
                format="%",
                title="",
                tickCount=5,
                gridColor="#dddddd",
                offset=6,
                tickSize=0,
                domainOpacity=0,
            ),
        ),
        facet=alt.Facet(
            "region",
            columns=5,
            title=" ",
            sort=alt.EncodingSortField(
                "covid_patients_share", op="max", order="descending"
            ),
        ),
        #     color=alt.Color('region', legend=alt.Legend(orient="top", title=' '))
    )
    .properties(
        width=200,
        height=200,
        title="Share of all hospital patients with COVID-19 by region",
    )
)

chart.configure_view(strokeOpacity=0)

---

### Hospital overcapacity

In [108]:
before = pd.read_csv("input/hospital_utilization.csv", dtype={"fac_no": str})

In [109]:
before[before["fac_name"].str.contains("KING")]

Unnamed: 0,fac_no,fac_name,fac_city,county,license_no,fac_zip,icu_beds,tot_lic_beds
185,106191230,"MARTIN LUTHER KING, JR. COMMUNITY HOSPITAL",LOS ANGELES,Los Angeles,60000132,90059,20,131


In [110]:
current_ca[current_ca["ccn"] == "050779"]

Unnamed: 0,hospital,hospital_subtype,ccn,week,county,fips,total_beds,total_staffed_adult_icu_beds,total_covid_icu_patients,total_available_adult_icu_beds,pct_occupied_adult_icu_beds,total_covid_patients,all_patients,covid_patients_share,region
139,"Martin Luther King, Jr. Community Hospital",Short Term,50779,2021-03-19,Los Angeles,37,184.0,20.0,1.0,14.0,0.3,19.0,109.0,0.17,Southern California


In [111]:
before[["fac_name"]].sort_values("fac_name", ascending=False).to_csv(
    "output/before_names.csv", index=False
)

In [112]:
current_ca[["hospital"]].sort_values("hospital", ascending=False).to_csv(
    "output/current_names.csv", index=False
)