# CDC estimated inpatient beds time series

* [Estimated inpatient beds](https://healthdata.gov/sites/default/files/estimated_inpatient_all_20210221_1631.csv)
* [Estimated inpatient beds in use by COVID-19 patients](https://healthdata.gov/sites/default/files/estimated_inpatient_covid_20210221_1631.csv)
* [Estimated ICU beds](https://healthdata.gov/sites/default/files/estimated_icu_20210221_1631.csv)

In [None]:
import pandas as pd
import numpy as np
import pycountry
import json
from datetime import datetime
from functools import reduce
import requests
from io import StringIO
import re
from bs4 import BeautifulSoup
import requests

In [None]:
# papermill parameters
output_folder = "../output/"

In [None]:
# root_url = "https://healthdata.gov/dataset/covid-19-estimated-patient-impact-and-hospital-capacity-state"
# states = [subdivision.code.replace("US-", "") for subdivision in pycountry.subdivisions if subdivision.code.startswith("US-")]
# response = requests.get(root_url)
# assert response.status_code == 200
# soup = BeautifulSoup(response.content)
# links = soup.find_all("a", {"class": "data-link"})
# assert len(links)
# df_list = []
# for l in links:
#     url = l["href"]
#     print(url)
#     df = pd.read_csv(url)
#     df["ISO3166_1"] = "US"
#     df["ISO3166_2"] = df["state"]
    
#     df["Last_Reported_Flag"] = df["collection_date"].max() == df["collection_date"]
#     df = df[df["ISO3166_2"].isin(states)]
#     df_list.append(df)

In [None]:
states = [subdivision.code.replace("US-", "") for subdivision in pycountry.subdivisions if subdivision.code.startswith("US-")]

In [None]:
url_covid_19_inpatient_beds = "https://healthdata.gov/api/views/py8k-j5rq/rows.csv?accessType=DOWNLOAD"
datasource_covid_19_inpatient_beds = "https://healthdata.gov/dataset/COVID-19-Estimated-Inpatient-Beds-Occupied-by-COVI/py8k-j5rq"
covid_19_inpatient_beds = pd.read_csv(url_covid_19_inpatient_beds)

url_occupied_icu_beds = "https://healthdata.gov/api/views/7ctx-gtb7/rows.csv?accessType=DOWNLOAD"
datasource_occupied_icu_beds = "https://healthdata.gov/dataset/COVID-19-Estimated-ICU-Beds-Occupied-by-State-Time/7ctx-gtb7"
occupied_icu_beds = pd.read_csv(url_occupied_icu_beds)

url_inpatient_beds = "https://healthdata.gov/api/views/jjp9-htie/rows.csv?accessType=DOWNLOAD"
datasource_inpatient_beds = "https://healthdata.gov/dataset/COVID-19-Estimated-Inpatient-Beds-Occupied-by-Stat/jjp9-htie"
inpatient_beds = pd.read_csv(url_inpatient_beds)

In [None]:
covid_19_inpatient_beds["ISO3166_1"] = "US"
covid_19_inpatient_beds["ISO3166_2"] = covid_19_inpatient_beds["state"]
covid_19_inpatient_beds["Last_Reported_Flag"] = covid_19_inpatient_beds["collection_date"].max() == covid_19_inpatient_beds["collection_date"]
covid_19_inpatient_beds.drop(columns=["geocoded_state"], inplace=True)
covid_19_inpatient_beds = covid_19_inpatient_beds[covid_19_inpatient_beds["ISO3166_2"].isin(states)]

occupied_icu_beds["ISO3166_1"] = "US"
occupied_icu_beds["ISO3166_2"] = occupied_icu_beds["state"]
occupied_icu_beds["Last_Reported_Flag"] = occupied_icu_beds["collection_date"].max() == occupied_icu_beds["collection_date"]
occupied_icu_beds.drop(columns=["geocoded_state"], inplace=True)
occupied_icu_beds = occupied_icu_beds[occupied_icu_beds["ISO3166_2"].isin(states)]

inpatient_beds["ISO3166_1"] = "US"
inpatient_beds["ISO3166_2"] = inpatient_beds["state"]
inpatient_beds["Last_Reported_Flag"] = inpatient_beds["collection_date"].max() == inpatient_beds["collection_date"]
inpatient_beds.drop(columns=["geocoded_state"], inplace=True)
inpatient_beds = inpatient_beds[inpatient_beds["ISO3166_2"].isin(states)]

In [None]:
inpatient_beds.columns = ["STATE", "DATE", "INPATIENT_BEDS_OCCUPIED", "INPATIENT_BEDS_LOWER_BOUND", "INPATIENT_BEDS_UPPER_BOUND", "INPATIENT_BEDS_IN_USE_PCT", "INPATIENT_BEDS_IN_USE_PCT_LOWER_BOUND", "INPATIENT_BEDS_IN_USE_PCT_UPPER_BOUND", "TOTAL_INPATIENT_BEDS", "TOTAL_INPATIENT_BEDS_LOWER_BOUND", "TOTAL_INPATIENT_BEDS_UPPER_BOUND", "ISO3166_1", "ISO3166_2", "LAST_REPORTED_FLAG"]
covid_19_inpatient_beds.columns = inpatient_beds.columns
occupied_icu_beds.columns = ["STATE", "DATE", "STAFFED_ADULT_ICU_BEDS_OCCUPIED", "STAFFED_ADULT_ICU_BEDS_OCCUPIED_LOWER_BOUND", "STAFFED_ADULT_ICU_BEDS_OCCUPIED_UPPER_BOUND", "STAFFED_ADULT_ICU_BEDS_OCCUPIED_PCT", "STAFFED_ADULT_ICU_BEDS_OCCUPIED_PCT_LOWER_BOUND", "STAFFED_ADULT_ICU_BEDS_OCCUPIED_PCT_UPPER_BOUND", "TOTAL_STAFFED_ICU_BEDS", "TOTAL_STAFFED_ICU_BEDS_LOWER_BOUND", "TOTAL_STAFFED_ICU_BEDS_UPPER_BOUND", "ISO3166_1", "ISO3166_2", "LAST_REPORTED_FLAG"]

In [None]:
for column in list(filter(lambda c: "BEDS" in c.upper() and "PCT" not in c.upper(), inpatient_beds.columns)):
    inpatient_beds[column] = inpatient_beds[column].replace(",","", regex=True).astype(int)

for column in list(filter(lambda c: "BEDS" in c.upper() and "PCT" not in c.upper(), covid_19_inpatient_beds.columns)):
    covid_19_inpatient_beds[column] = covid_19_inpatient_beds[column].replace(",","", regex=True).astype(int)

for column in list(filter(lambda c: "BEDS" in c.upper() and "PCT" not in c.upper(), occupied_icu_beds.columns)):
    occupied_icu_beds[column] = occupied_icu_beds[column].replace(",","", regex=True).astype(int)
    

```sql
CREATE TABLE CDC_INPATIENT_BEDS_ALL (
    STATE varchar,
DATE timestamp_ntz,
INPATIENT_BEDS_OCCUPIED integer,
INPATIENT_BEDS_LOWER_BOUND integer,
INPATIENT_BEDS_UPPER_BOUND integer,
INPATIENT_BEDS_IN_USE_PCT float,
INPATIENT_BEDS_IN_USE_PCT_LOWER_BOUND float,
INPATIENT_BEDS_IN_USE_PCT_UPPER_BOUND float,
TOTAL_INPATIENT_BEDS integer,
TOTAL_INPATIENT_BEDS_LOWER_BOUND integer,
TOTAL_INPATIENT_BEDS_UPPER_BOUND integer,
ISO3166_1 varchar(2),
ISO3166_2 varchar(2),
LAST_REPORTED_FLAG boolean
);



CREATE TABLE  CDC_INPATIENT_BEDS_COVID_19 (
  STATE varchar,
  DATE timestamp_ntz,
  INPATIENT_BEDS_OCCUPIED integer,
  INPATIENT_BEDS_LOWER_BOUND integer,
  INPATIENT_BEDS_UPPER_BOUND integer,
  INPATIENT_BEDS_IN_USE_PCT float,
  INPATIENT_BEDS_IN_USE_PCT_LOWER_BOUND float,
  INPATIENT_BEDS_IN_USE_PCT_UPPER_BOUND float,
  TOTAL_INPATIENT_BEDS integer,
  TOTAL_INPATIENT_BEDS_LOWER_BOUND integer,
  TOTAL_INPATIENT_BEDS_UPPER_BOUND integer,
  ISO3166_1 varchar(2),
  ISO3166_2 varchar(2),
  LAST_REPORTED_FLAG boolean
);


CREATE TABLE CDC_INPATIENT_BEDS_ICU_ALL(
  STATE varchar,
  DATE timestamp_ntz,
  STAFFED_ADULT_ICU_BEDS_OCCUPIED integer,
  STAFFED_ADULT_ICU_BEDS_OCCUPIED_LOWER_BOUND integer,
  STAFFED_ADULT_ICU_BEDS_OCCUPIED_UPPER_BOUND integer,
  STAFFED_ADULT_ICU_BEDS_OCCUPIED_PCT float,
  STAFFED_ADULT_ICU_BEDS_OCCUPIED_PCT_LOWER_BOUND float,
  STAFFED_ADULT_ICU_BEDS_OCCUPIED_PCT_UPPER_BOUND float,
  TOTAL_STAFFED_ICU_BEDS integer,
  TOTAL_STAFFED_ICU_BEDS_LOWER_BOUND integer,
  TOTAL_STAFFED_ICU_BEDS_UPPER_BOUND integer,
  ISO3166_1 varchar(2),
;  ISO3166_2 varchar(2),
  LAST_REPORTED_FLAG boolean
);
```

In [None]:
inpatient_beds.to_csv(output_folder + "CDC_INPATIENT_BEDS_ALL.csv", index=False, columns=inpatient_beds.columns)
covid_19_inpatient_beds.to_csv(output_folder + "CDC_INPATIENT_BEDS_COVID_19.csv", index=False, columns=covid_19_inpatient_beds.columns)
occupied_icu_beds.to_csv(output_folder + "CDC_INPATIENT_BEDS_ICU_ALL.csv", index=False, columns=occupied_icu_beds.columns)