In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import plotly.express as px
import plotly.graph_objects as go

from IPython.display import display
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual

%matplotlib inline

In [3]:
data_folder = 'data'
cases_and_death_file = 'United_States_COVID-19_Cases_and_Deaths_by_State_over_Time.csv'

In [4]:
df = pd.read_csv(f'{data_folder}/{cases_and_death_file}')

In [5]:
df.head()

Unnamed: 0,submission_date,state,tot_cases,conf_cases,prob_cases,new_case,pnew_case,tot_death,conf_death,prob_death,new_death,pnew_death,created_at,consent_cases,consent_deaths
0,03/11/2021,KS,297229,241035.0,56194.0,0,0.0,4851,,,0,0.0,03/12/2021 03:20:13 PM,Agree,
1,12/01/2021,ND,163565,135705.0,27860.0,589,220.0,1907,,,9,0.0,12/02/2021 02:35:20 PM,Agree,Not agree
2,04/07/2020,AS,0,,,0,,0,,,0,,04/07/2020 04:22:39 PM,,
3,04/08/2020,AR,1071,,,78,,18,,,0,,04/07/2020 04:22:39 PM,Not agree,Not agree
4,02/04/2020,AR,0,,,0,,0,,,0,,03/26/2020 04:22:39 PM,Not agree,Not agree


In [9]:
df['submission_date'] = pd.to_datetime(df['submission_date'])

In [10]:
max(df['submission_date'])
# last date updated: Sept 30, 202

Timestamp('2022-09-30 00:00:00')

In [12]:
df['created_at'] = pd.to_datetime(df['created_at'])

In [13]:
max(df['created_at'])
# data for previous day updated the next day

Timestamp('2022-10-01 13:17:56')

# New Cases - Overall USA

In [14]:
df_new_cases = df[['submission_date', 'new_case']]

In [17]:
df_new_cases_grouped = df_new_cases.groupby('submission_date').sum().reset_index().sort_values(by='submission_date')

In [18]:
df_new_cases_grouped

Unnamed: 0,submission_date,new_case
0,2020-01-22,4
1,2020-01-23,2
2,2020-01-24,1
3,2020-01-25,0
4,2020-01-26,1
...,...,...
978,2022-09-26,38190
979,2022-09-27,40163
980,2022-09-28,63713
981,2022-09-29,89855


In [50]:
fig = px.line(df_new_cases_grouped, x='submission_date', y="new_case",
             title='Trends in Daily New Cases')
# fig = px.area(df_new_cases_grouped, x='submission_date', y="new_case",
#              title='Trends in Daily New Cases')

In [51]:
fig.show()

In [52]:
fig.update_layout(xaxis = dict(rangeslider=dict(visible=True), type='date'))

In [53]:
fig.update_layout({
'plot_bgcolor': 'rgba(0, 0, 0, 0)',
'paper_bgcolor': 'rgba(0, 0, 0, 0)'},
    
    title={'y':0.9,
        'x':0.5,'xanchor': 'center',
        'yanchor': 'top'}
)
fig.update_xaxes(title_text='Date')
fig.update_yaxes(title_text='Daily New Cases')

In [57]:
# 7 day moving average
df_new_cases_grouped['7-day-average'] = round(df_new_cases_grouped['new_case'].rolling(7).mean())

In [58]:
df_new_cases_grouped

Unnamed: 0,submission_date,new_case,7-day-average
0,2020-01-22,4,
1,2020-01-23,2,
2,2020-01-24,1,
3,2020-01-25,0,
4,2020-01-26,1,
...,...,...,...
978,2022-09-26,38190,51397.0
979,2022-09-27,40163,50046.0
980,2022-09-28,63713,47117.0
981,2022-09-29,89855,45728.0


In [59]:
fig = px.line(df_new_cases_grouped, x='submission_date', y="7-day-average",
             title='Trends - 7 Day Average')

In [60]:
fig.show()

In [77]:
# Combining

fig = go.Figure()

fig.add_trace(go.Scatter(x=df_new_cases_grouped['submission_date'], y=df_new_cases_grouped['new_case'], mode='lines',
                        name='Daily New Cases'))

fig.add_trace(go.Scatter(x=df_new_cases_grouped['submission_date'], y=df_new_cases_grouped['7-day-average'], mode='lines',
                        name='7-day Moving Average'))

In [81]:
fig.update_layout(xaxis = dict(rangeslider=dict(visible=True), type='date'))

fig.update_layout(title='Daily New Cases - USA',
                   xaxis_title='Date',
                   yaxis_title='New Cases')


fig.update_layout({
'plot_bgcolor': 'rgba(0, 0, 0, 0)',
'paper_bgcolor': 'rgba(0, 0, 0, 0)'},
    
    title={'y':0.9,
        'x':0.5,'xanchor': 'center',
        'yanchor': 'top'}
)


# New Cases - by State

In [102]:
us_state_to_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "United States Minor Outlying Islands": "UM",
    "U.S. Virgin Islands": "VI",
    "Republic of Marshall Islands": 'RMI',
    'New York City': 'NYC',
    'Palau': 'PW',
    'Federated States of Micronesia': 'FSM'
}
    
# invert the dictionary
abbrev_to_us_state = dict(map(reversed, us_state_to_abbrev.items()))

In [104]:
df['state_mapping'] = ""
for i in df.index:
    df.loc[i, 'state_mapping'] = abbrev_to_us_state[df.loc[i]['state']]

In [105]:
@widgets.interact(State=df['state_mapping'].sort_values().unique())
def plot_new_cases_by_state(State):
    df_state = df[df['state_mapping'] == State][['submission_date', 'new_case']]
    
    df_new_cases_grouped = df_state.groupby('submission_date').sum().reset_index().sort_values(by='submission_date')
    df_new_cases_grouped['7-day-average'] = round(df_new_cases_grouped['new_case'].rolling(7).mean())
    # Combining
# import plotly.graph_objects as go

    fig = go.Figure()

    fig.add_trace(go.Scatter(x=df_new_cases_grouped['submission_date'], y=df_new_cases_grouped['new_case'], mode='lines',
                            name='Daily New Cases'))

    fig.add_trace(go.Scatter(x=df_new_cases_grouped['submission_date'], y=df_new_cases_grouped['7-day-average'], mode='lines',
                            name='7-day Moving Average'))
    
    
    fig.update_layout(xaxis = dict(rangeslider=dict(visible=True), type='date'))

    fig.update_layout(title=f'Daily New Cases - {State}',
                       xaxis_title='Date',
                       yaxis_title='New Cases')


    fig.update_layout({
    'plot_bgcolor': 'rgba(0, 0, 0, 0)',
    'paper_bgcolor': 'rgba(0, 0, 0, 0)'},

        title={'y':0.9,
            'x':0.5,'xanchor': 'center',
            'yanchor': 'top'}
    )
    fig.show()

interactive(children=(Dropdown(description='State', options=('Alabama', 'Alaska', 'American Samoa', 'Arizona',…