In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# read in hospitalization data and display
hosp_df = pd.read_csv("resources/covid19hospitalbycounty.csv")
hosp_df

Unnamed: 0,county,todays_date,hospitalized_covid_confirmed_patients,hospitalized_suspected_covid_patients,hospitalized_covid_patients,all_hospital_beds,icu_covid_confirmed_patients,icu_suspected_covid_patients,icu_available_beds
0,Calaveras,2020-03-29,,,,,,,
1,Trinity,2020-03-29,0.0,0.0,,,0.0,0.0,
2,Marin,2020-03-29,7.0,13.0,,,2.0,6.0,11.0
3,Butte,2020-03-29,,,,,,,
4,Solano,2020-03-29,7.0,27.0,,,4.0,1.0,28.0
...,...,...,...,...,...,...,...,...,...
36280,Kings,2022-01-05,23.0,0.0,23.0,121.0,3.0,0.0,2.0
36281,Amador,2022-01-05,7.0,0.0,7.0,53.0,2.0,0.0,2.0
36282,Monterey,2022-01-05,57.0,0.0,57.0,716.0,11.0,0.0,14.0
36283,San Joaquin,2022-01-05,123.0,7.0,130.0,957.0,26.0,1.0,13.0


In [3]:
# it looks like we have only 56 of 58 available counties in CA
len(hosp_df.county.unique())

56

In [4]:
# get a summary of NaN values
hosp_df.isnull().sum()

county                                      0
todays_date                                 0
hospitalized_covid_confirmed_patients       8
hospitalized_suspected_covid_patients       8
hospitalized_covid_patients              1285
all_hospital_beds                        1375
icu_covid_confirmed_patients               29
icu_suspected_covid_patients               29
icu_available_beds                        804
dtype: int64

In [5]:
# replace NaN values with 0
hosp_df = hosp_df.fillna(0)

In [6]:
# confirm datatypes
hosp_df.dtypes

county                                    object
todays_date                               object
hospitalized_covid_confirmed_patients    float64
hospitalized_suspected_covid_patients    float64
hospitalized_covid_patients              float64
all_hospital_beds                        float64
icu_covid_confirmed_patients             float64
icu_suspected_covid_patients             float64
icu_available_beds                       float64
dtype: object

In [7]:
# create new df with only relevant columns
clean_df = hosp_df[['county', 'todays_date', 'hospitalized_covid_confirmed_patients', 'icu_available_beds']]

# columns renamed
clean_df.columns = ['county', 'date', 'hospitalized_patients', 'icu_beds']

In [8]:
# sort df by the date column, ascending
clean_df = clean_df.sort_values('date')

# create a list of counties for later iteration/calculation
counties = clean_df['county'].unique().tolist()

In [9]:
# create empty df; the new df will contain rolling averages
column_names = ['county', 'date', 'hospitalized_patients', 'icu_beds',
               'hosp_average', 'icu_average']

final_df = pd.DataFrame(columns = column_names)
final_df

Unnamed: 0,county,date,hospitalized_patients,icu_beds,hosp_average,icu_average


In [10]:
# loop through counties, apply relevant rolling average
# append results to final_df

for county in counties:
    
    # filter df for given county
    df = clean_df.loc[clean_df['county'] == county]
    # reset the index
    df = df.reset_index(drop=True)
    
    # apply .rolling() to create a column of 14-day averages
    df['hosp_average'] = df.hospitalized_patients.rolling(window=14).mean()
    # same operation for a 7-day moving window
    df['icu_average'] = df.icu_beds.rolling(window=7).mean()
    
    # append temporary dataframe with calculations to our final dataframe
    final_df = final_df.append(df)

final_df = final_df.fillna(0)
    
final_df

Unnamed: 0,county,date,hospitalized_patients,icu_beds,hosp_average,icu_average
0,Calaveras,2020-03-29,0.0,0.0,0.0,0.0
1,Calaveras,2020-03-30,0.0,5.0,0.0,0.0
2,Calaveras,2020-03-31,0.0,4.0,0.0,0.0
3,Calaveras,2020-04-01,0.0,5.0,0.0,0.0
4,Calaveras,2020-04-02,0.0,5.0,0.0,0.0
...,...,...,...,...,...,...
640,Mariposa,2022-01-01,0.0,0.0,0.0,0.0
641,Mariposa,2022-01-02,0.0,0.0,0.0,0.0
642,Mariposa,2022-01-03,0.0,0.0,0.0,0.0
643,Mariposa,2022-01-04,0.0,0.0,0.0,0.0


In [11]:
import json

In [12]:
# use .to_dict and 'records' orient to make our desired list
list_d = final_df.to_dict(orient='records')

In [13]:
# write to a text file for our data.js later
#with open("rolling_hosp_dict.txt", "w") as file:
    #file.write(json.dumps(list_d))

In [14]:
# or, write to javascript file
with open("static/js/hospital_data.js", "w") as file:
    file.write(f"var hosp_data = {list_d};")