In [1]:
#dependencies

import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
import plotly.express as px
import plotly.graph_objects as go

In [2]:
#read csv, push to dataframe

us_df = pd.read_csv ("data/state_daily_cases.csv")
us_df.head()

Unnamed: 0,date,state,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,...,posNeg,deathIncrease,hospitalizedIncrease,hash,commercialScore,negativeRegularScore,negativeScore,positiveScore,score,grade
0,20200829,AK,6035.0,339660.0,,43.0,,,,7.0,...,345695,0,0,d14280fc719ed7f9365e0e046b0ac9ffd9a0b06b,0,0,0,0,0,
1,20200829,AL,123889.0,851929.0,,986.0,14267.0,,1459.0,,...,975818,45,0,668b17e7da32941aa4a95ef2775527e9e7d5d7a7,0,0,0,0,0,
2,20200829,AR,60378.0,646592.0,,407.0,4142.0,,,95.0,...,706970,16,0,19baf0f83cf44094d1194521bfebecbcdeaf6961,0,0,0,0,0,
3,20200829,AS,0.0,1514.0,,,,,,,...,1514,0,0,e063eb477b842be4d1532f74fc5371dd23ec577d,0,0,0,0,0,
4,20200829,AZ,201287.0,991089.0,,812.0,21433.0,262.0,,164.0,...,1192376,29,5,d6aca9f355470d3f2ed666030c62bc76e880d557,0,0,0,0,0,


In [3]:
#isolate data on georgia, remove extraneous columns, reset index

ga_df = us_df.loc [us_df['state'] == 'GA', ['date', 'state', 'positive', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCumulative']]
ga_df = ga_df.reset_index (drop = True)

ga_df

Unnamed: 0,date,state,positive,hospitalizedCurrently,hospitalizedCumulative,inIcuCumulative
0,20200829,GA,267758.0,2582.0,24533.0,4479.0
1,20200828,GA,265372.0,2648.0,24335.0,4433.0
2,20200827,GA,263074.0,2129.0,24127.0,4397.0
3,20200826,GA,260590.0,2227.0,23939.0,4360.0
4,20200825,GA,258354.0,2260.0,23717.0,4322.0
...,...,...,...,...,...,...
174,20200308,GA,7.0,,,
175,20200307,GA,6.0,,,
176,20200306,GA,2.0,,,
177,20200305,GA,2.0,,,


In [4]:
#loop through rows, calculate days from start of pandemic in GA, insert into new column

ga_df['days_from_start'] = ''
ga_df['date_format'] = ''

start_day = dt.datetime.strptime (f'{(ga_df.iloc [-1, 0])}', '%Y%m%d')

for index, row in ga_df.iterrows():
    day_number = (dt.datetime.strptime (f"{row['date']}", '%Y%m%d'))
    day_delta = (day_number - start_day).days
    ga_df.loc [index, 'date_format'] = day_number
    ga_df.loc [index, 'days_from_start'] = day_delta
    
ga_df

Unnamed: 0,date,state,positive,hospitalizedCurrently,hospitalizedCumulative,inIcuCumulative,days_from_start,date_format
0,20200829,GA,267758.0,2582.0,24533.0,4479.0,178,2020-08-29 00:00:00
1,20200828,GA,265372.0,2648.0,24335.0,4433.0,177,2020-08-28 00:00:00
2,20200827,GA,263074.0,2129.0,24127.0,4397.0,176,2020-08-27 00:00:00
3,20200826,GA,260590.0,2227.0,23939.0,4360.0,175,2020-08-26 00:00:00
4,20200825,GA,258354.0,2260.0,23717.0,4322.0,174,2020-08-25 00:00:00
...,...,...,...,...,...,...,...,...
174,20200308,GA,7.0,,,,4,2020-03-08 00:00:00
175,20200307,GA,6.0,,,,3,2020-03-07 00:00:00
176,20200306,GA,2.0,,,,2,2020-03-06 00:00:00
177,20200305,GA,2.0,,,,1,2020-03-05 00:00:00


In [5]:
#loop through cases column, calculate daily increase, insert into new column

ga_df['daily_increase'] = ''

for x in range(len(ga_df['positive'])):
    try:
        increase = ga_df['positive'][x] - ga_df['positive'][x + 1]
    
    except:
        increase = ga_df['positive'][x] - 0
        
    ga_df.iloc [x, 8] = increase
    
ga_df

Unnamed: 0,date,state,positive,hospitalizedCurrently,hospitalizedCumulative,inIcuCumulative,days_from_start,date_format,daily_increase
0,20200829,GA,267758.0,2582.0,24533.0,4479.0,178,2020-08-29 00:00:00,2386
1,20200828,GA,265372.0,2648.0,24335.0,4433.0,177,2020-08-28 00:00:00,2298
2,20200827,GA,263074.0,2129.0,24127.0,4397.0,176,2020-08-27 00:00:00,2484
3,20200826,GA,260590.0,2227.0,23939.0,4360.0,175,2020-08-26 00:00:00,2236
4,20200825,GA,258354.0,2260.0,23717.0,4322.0,174,2020-08-25 00:00:00,2101
...,...,...,...,...,...,...,...,...,...
174,20200308,GA,7.0,,,,4,2020-03-08 00:00:00,1
175,20200307,GA,6.0,,,,3,2020-03-07 00:00:00,4
176,20200306,GA,2.0,,,,2,2020-03-06 00:00:00,0
177,20200305,GA,2.0,,,,1,2020-03-05 00:00:00,0


In [121]:
#loop through case increases column, calculate 7-day average increase, insert into new column

ga_df['7d_avg_increase'] = ''

for x in range(len(ga_df['daily_increase'])):
    try:
        running_avg = (ga_df.iloc [x:(x + 7), 8]).mean()
        ga_df.iloc [x, 9] = round (running_avg, 1)
        
    except:
        running_avg = (ga_df.iloc [-x:, 8]).mean()
        ga_df.iloc [x, 9] = round (running_avg, 1)

ga_df

Unnamed: 0,date,state,positive,hospitalizedCurrently,hospitalizedCumulative,inIcuCumulative,days_from_start,date_format,daily_increase,7d_avg_increase
0,20200829,GA,267758.0,2582.0,24533.0,4479.0,178,2020-08-29 00:00:00,2386,2219.4
1,20200828,GA,265372.0,2648.0,24335.0,4433.0,177,2020-08-28 00:00:00,2298,2248.9
2,20200827,GA,263074.0,2129.0,24127.0,4397.0,176,2020-08-27 00:00:00,2484,2333.3
3,20200826,GA,260590.0,2227.0,23939.0,4360.0,175,2020-08-26 00:00:00,2236,2372.6
4,20200825,GA,258354.0,2260.0,23717.0,4322.0,174,2020-08-25 00:00:00,2101,2382.4
...,...,...,...,...,...,...,...,...,...,...
174,20200308,GA,7.0,,,,4,2020-03-08 00:00:00,1,1.4
175,20200307,GA,6.0,,,,3,2020-03-07 00:00:00,4,1.5
176,20200306,GA,2.0,,,,2,2020-03-06 00:00:00,0,0.7
177,20200305,GA,2.0,,,,1,2020-03-05 00:00:00,0,1


In [154]:
layout = go.Layout (title = 'daily increase of COVID cases in Georgia, US',
                    plot_bgcolor = 'whitesmoke',
                    xaxis = dict (title = 'date',
                                  showgrid = False,
                                  linecolor = "#BCCCDC",
                                  showspikes = True,
                                  spikethickness = 2,
                                  spikedash = "dot",
                                  spikecolor = "#999999",
                                  spikemode = "across",),
                    yaxis = dict (title = '# of cases',
                                  linecolor = "#BCCCDC"),
                    hovermode = "x",
                    hoverdistance = 15,
                    spikedistance = 5000)

fig = go.Figure(layout = layout)

fig.add_trace (
    go.Bar (x = ga_df['date_format'],
            y = ga_df['daily_increase'],
            name = 'daily increase',
            marker = dict (color = '#303F9F'))
)

fig.add_trace (
    go.Scatter (x = ga_df['date_format'],
                y = ga_df['7d_avg_increase'],
                name = '7-day moving average',
                line = dict (color = '#F57C00'),
                fill = 'tozeroy',
                fillcolor = 'rgba(245, 124, 0, 0.25)')
)

fig.show()

In [155]:
# sources:
# https://gov.georgia.gov/document/2020-executive-order/03232001/download
# https://gov.georgia.gov/document/2020-proclamation/executive-order-04202001-handout/download
# https://gov.georgia.gov/document/2020-executive-order/06122001/download
# july 4th
# https://gov.georgia.gov/document/2020-executive-order/07152001/download
# https://gov.georgia.gov/document/2020-executive-order/08152001/download

dates = {'2020-04-10':'8 days after shelter-in-place order',
         '2020-05-02':'8 days after businesses are allowed to reopen',
         '2020-06-20':'8 days after kemp loosens restrictions on gatherings at<br>restaurants, theaters, summer camps, conventions, & concerts',
         '2020-07-12':'8 days after july 4th',
         '2020-07-23':'8 days after kemp overrules city mask mandates',
         '2020-08-15':'kemp allows mask mandates'}

for key, val in dates.items():
    new_date = dt.datetime.strptime (key, '%Y-%m-%d').strftime ('%b %d, %Y')
    fig.add_trace (
        go.Scatter (x = [key, key],
                    y = [0, 5500],
                    opacity = 0.7,
                    mode="lines",
                    line = dict (color = '#B0BEC5',
                                 width = 2,
                                 dash = 'dashdot'),
                    hovertemplate = f"<b>{new_date}:</b><br>{val}<extra></extra>",
                    showlegend = False
                    ))


fig.show()