In [None]:
import pandas as pd
import plotly.graph_objects as go
import requests
import zipfile
import io
from pathlib import Path


In [None]:
outdir = Path('output')
if not outdir.exists():
    outdir.mkdir()

In [None]:
#pull down the data
r = requests.get('https://ihmecovid19storage.blob.core.windows.net/latest/ihme-covid19.zip')

#get a pointer to the zipfile
z = zipfile.ZipFile(io.BytesIO(r.content))

#find the name of the embedded csv file, something like '2020_04_09.04/Hospitalization_all_locs.csv'
for file in z.filelist:
    if '.csv' in file.filename:
        csvfile = file.filename

#if I don't want to keep the csv file, just do this
#ihme = pd.read_csv(z.open(csvfile))

#figure out what I want to name the output (use the directory name + .csv)
outfile = outdir / Path(csvfile.split('/')[0] + '.csv')

#extract the csv and write it to my file
with z.open(csvfile) as zf, open(outfile,'wb') as f:
    f.write(zf.read())

#load the csv file into a pandas dataframe
ihme = pd.read_csv(outfile)

#filter to just Georgia
ga_ihme = ihme[ihme['location_name']=='Georgia']

#write the Georgia data to a CSV
GAoutfile = outdir / Path(csvfile.split('/')[0] + '.ga.csv')
ga_ihme.to_csv(GAoutfile)

#print a few lines
ga_ihme

In [None]:
#population data for each region
pop_region = {'A':437225,'B':419825,'C':522432,
              'D':3472810,'E':540645,'F':814248,
              'G':467607,'H':277781,'I':424343,
              'J':775422,'K':401738,'L':240045,
              'M':218716,'N':1284647}

#I want the same columns, so make a copy of the main dataframe but delete the data
ga_ihme_by_region = ga_ihme[0:0]

#loop over the regions
for region in pop_region.keys():
    #start by copying the state data
    tempdata = ga_ihme.copy()
    
    #change name from 'Georgia' to 'Georgia-Region'
    tempdata['location_name'] = tempdata['location_name'] + '-' + region
    
    #scale everything by the first few columns
    tempdata.iloc[:,3:] = pop_region[region] / sum(pop_region.values()) * tempdata.iloc[:,3:]

    #append this regional data to the combined dataframe
    ga_ihme_by_region = ga_ihme_by_region.append(tempdata,ignore_index=True)
    
#write the Georgia-Region data to a CSV
GAoutfile = outdir / Path(csvfile.split('/')[0] + '.ga-region.csv')
ga_ihme_by_region.to_csv(GAoutfile)

#print a few lines
ga_ihme_by_region

In [None]:
#make a figure of the allbed data
fig = go.Figure()
fig.add_trace(go.Scatter(x=ga_ihme['date'],y=ga_ihme['allbed_mean'],name='allbed_mean'))
fig.add_trace(go.Scatter(x=ga_ihme['date'],y=ga_ihme['allbed_lower'],name='allbed_lower'))
fig.add_trace(go.Scatter(x=ga_ihme['date'],y=ga_ihme['allbed_upper'],name='allbed_upper'))
fig.show()

In [None]:
#make a figure of the allbed data (exmample for just region A)
fig = go.Figure()
regionA = ga_ihme_by_region[ga_ihme_by_region['location_name'] == 'Georgia-A']
fig.add_trace(go.Scatter(x=regionA['date'],y=regionA['allbed_mean'],name='allbed_meanA'))
fig.add_trace(go.Scatter(x=regionA['date'],y=regionA['allbed_lower'],name='allbed_lowerA'))
fig.add_trace(go.Scatter(x=regionA['date'],y=regionA['allbed_upper'],name='allbed_upperA'))
fig.show()

In [None]:
#find the max of each data column
#note that these might not occur on the same day
maxdata = pd.DataFrame(ga_ihme.iloc[:,3:].max(),
                  columns=['State'])

#find the row IDs for the peak of each column
maxIDs = ga_ihme.iloc[:,3:].idxmax()

#add a blank column for the dates
maxdata['Date'] = ""
maxdata = maxdata[['Date','State']]

#loop over the fields (i.e., allbed_mean, allbed_lower, etc.) and set the date
#to the date that field will peak
for label in maxIDs.index:
    maxdata.loc[label,'Date'] = ga_ihme.loc[maxIDs[label],'date']
    
#loop over regions
for region in pop_region.keys():
    regionname = 'Georgia-' + region
    df2 = pd.DataFrame(ga_ihme_by_region[ga_ihme_by_region['location_name']==regionname].iloc[:,3:].max(),
                            columns=[region])

    maxdata = maxdata.join(df2)

#write to CSV
peakoutfile = outdir / Path(csvfile.split('/')[0] + '.ga-peaks.csv')
maxdata.to_csv(peakoutfile)

#print to screen
maxdata

In [None]:
#similar to above, but for the actual data from covidtracking.com
#this data source gives a simple csv instead of a zip, so I'll just load it directly
actuals = pd.read_csv("https://covidtracking.com/api/states/daily.csv?state=GA")

#the date field is really a string with YYYYMMDD format, convert to a python datetime field
actuals['date'] = pd.to_datetime(actuals['date'],format='%Y%m%d')

#this data goes back months, but GA reporting only started in March. Drop the blank lines.
actuals = actuals.dropna(subset=['hospitalizedCumulative'])

#print the first few lines
actuals.head()

In [None]:
#make a figure of the cumulative hospitalizations
fig = go.Figure(data=go.Scatter(x=actuals['date'],y=actuals['hospitalizedCumulative']))
fig.show()