In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# read in hospitalization data and display
hosp_df = pd.read_csv("resources/covid19hospitalbycounty.csv")
hosp_df

Unnamed: 0,county,todays_date,hospitalized_covid_confirmed_patients,hospitalized_suspected_covid_patients,hospitalized_covid_patients,all_hospital_beds,icu_covid_confirmed_patients,icu_suspected_covid_patients,icu_available_beds
0,Contra Costa,2020-03-29,23.0,62.0,,,8.0,15.0,62.0
1,Imperial,2020-03-29,5.0,100.0,,,1.0,2.0,2.0
2,Glenn,2020-03-29,,,,,,,
3,Lake,2020-03-29,0.0,1.0,,,0.0,1.0,2.0
4,Santa Barbara,2020-03-29,15.0,21.0,,,9.0,3.0,47.0
...,...,...,...,...,...,...,...,...,...
36224,San Bernardino,2022-01-04,743.0,44.0,787.0,3676.0,162.0,8.0,88.0
36225,Santa Barbara,2022-01-04,64.0,0.0,64.0,614.0,9.0,0.0,34.0
36226,Madera,2022-01-04,23.0,9.0,32.0,278.0,6.0,1.0,2.0
36227,Lake,2022-01-04,3.0,0.0,3.0,64.0,1.0,0.0,2.0


In [3]:
# it looks like we have only 56 of 58 available counties in CA
len(hosp_df.county.unique())

56

In [4]:
# get a summary of NaN values
hosp_df.isnull().sum()

county                                      0
todays_date                                 0
hospitalized_covid_confirmed_patients       8
hospitalized_suspected_covid_patients       8
hospitalized_covid_patients              1285
all_hospital_beds                        1375
icu_covid_confirmed_patients               29
icu_suspected_covid_patients               29
icu_available_beds                        804
dtype: int64

In [5]:
# replace NaN values with 0
hosp_df = hosp_df.fillna(0)

In [6]:
# confirm datatypes
hosp_df.dtypes

county                                    object
todays_date                               object
hospitalized_covid_confirmed_patients    float64
hospitalized_suspected_covid_patients    float64
hospitalized_covid_patients              float64
all_hospital_beds                        float64
icu_covid_confirmed_patients             float64
icu_suspected_covid_patients             float64
icu_available_beds                       float64
dtype: object

In [7]:
# create new df with only relevant columns
clean_df = hosp_df[['county', 'todays_date', 'hospitalized_covid_confirmed_patients', 'icu_available_beds']]

# columns renamed
clean_df.columns = ['county', 'date', 'hospitalized_patients', 'icu_beds']

In [8]:
# sort df by the date column, ascending
clean_df = clean_df.sort_values('date')

# create a list of counties for later iteration/calculation
counties = clean_df['county'].unique().tolist()

In [9]:
# create empty df; the new df will contain rolling averages
column_names = ['county', 'date', 'hospitalized_patients', 'icu_beds',
               'hosp_average', 'icu_average']

final_df = pd.DataFrame(columns = column_names)
final_df

Unnamed: 0,county,date,hospitalized_patients,icu_beds,hosp_average,icu_average


In [10]:
# loop through counties, apply relevant rolling average
# append results to final_df

for county in counties:
    
    # filter df for given county
    df = clean_df.loc[clean_df['county'] == county]
    # reset the index
    df = df.reset_index(drop=True)
    
    # apply .rolling() to create a column of 14-day averages
    df['hosp_average'] = df.hospitalized_patients.rolling(window=14).mean()
    # same operation for a 7-day moving window
    df['icu_average'] = df.icu_beds.rolling(window=7).mean()
    
    # append temporary dataframe with calculations to our final dataframe
    final_df = final_df.append(df)

final_df = final_df.fillna(0)
    
final_df

Unnamed: 0,county,date,hospitalized_patients,icu_beds,hosp_average,icu_average
0,Contra Costa,2020-03-29,23.0,62.0,0.0,0.0
1,Contra Costa,2020-03-30,25.0,90.0,0.0,0.0
2,Contra Costa,2020-03-31,27.0,68.0,0.0,0.0
3,Contra Costa,2020-04-01,31.0,61.0,0.0,0.0
4,Contra Costa,2020-04-02,32.0,73.0,0.0,0.0
...,...,...,...,...,...,...
639,Mariposa,2021-12-31,0.0,0.0,0.0,0.0
640,Mariposa,2022-01-01,0.0,0.0,0.0,0.0
641,Mariposa,2022-01-02,0.0,0.0,0.0,0.0
642,Mariposa,2022-01-03,0.0,0.0,0.0,0.0


In [11]:
import json

In [12]:
# use .to_dict and 'records' orient to make our desired list
list_d = final_df.to_dict(orient='records')

In [13]:
# write to a text file for our data.js later
#with open("rolling_hosp_dict.txt", "w") as file:
    #file.write(json.dumps(list_d))

In [14]:
# or, write to javascript file
with open("static/js/hospital_data.js", "w") as file:
    file.write(f"var hosp_data = {list_d};")