In [57]:
# Import necessary libraries
import datadotworld as dw
import pandas as pd
import plotly.express as px
import datetime as dt
import numpy as np
import requests
import json

In [58]:
# Load data from covidtracking.com API (documentation: https://covidtracking.com/api)
api_response = requests.get(url='https://covidtracking.com/api/v1/states/daily.json')

json_load = json.loads(api_response.text)

df_corona = pd.DataFrame(json_load)

df_corona.head()

Unnamed: 0,date,state,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,...,posNeg,deathIncrease,hospitalizedIncrease,hash,commercialScore,negativeRegularScore,negativeScore,positiveScore,score,grade
0,20200625,AK,816.0,98636.0,,14.0,,,,2.0,...,99452,0,0,74506565ff9f0d6621e232e1e62239d658786e81,0,0,0,0,0,
1,20200625,AL,33206.0,336252.0,,693.0,2612.0,,761.0,,...,369458,5,45,17d9d87e1df190897ce93d3fcbb382e6dac460e3,0,0,0,0,0,
2,20200625,AR,18062.0,259318.0,,284.0,1245.0,,,61.0,...,277380,0,31,651dd04526a06699eef6c83fc27f1850be5e46cb,0,0,0,0,0,
3,20200625,AS,0.0,696.0,,,,,,,...,696,0,0,191bf0110e14794659c0c2f75b655e51fc2b9fb7,0,0,0,0,0,
4,20200625,AZ,63030.0,401166.0,,2453.0,4406.0,611.0,,415.0,...,464196,27,93,1ac05d6977f2752d59425412bbb0a63897260ea0,0,0,0,0,0,


In [59]:
# Add and clean columns
df_corona['Measurement Date'] = pd.to_datetime(df_corona['date'],format='%Y%m%d', errors='ignore')

df_corona = df_corona.rename(columns={"positiveIncrease": "New Cases",
                                      "deathIncrease":"New Deaths",
                                      "hospitalizedIncrease":"New Hospitalizations",
                                      "hospitalizedCurrently":"Current Hospitalizations",
                                      "positive":"Cumulative Cases",
                                      "death":"Cumulative Deaths",
                                      "hospitalizedCumulative":"Cumulative Hospitalizations",
                                      "inIcuCurrently":"Number Of People In ICU Beds"})

df_corona.head()

Unnamed: 0,date,state,Cumulative Cases,negative,pending,Current Hospitalizations,Cumulative Hospitalizations,Number Of People In ICU Beds,inIcuCumulative,onVentilatorCurrently,...,New Deaths,New Hospitalizations,hash,commercialScore,negativeRegularScore,negativeScore,positiveScore,score,grade,Measurement Date
0,20200625,AK,816.0,98636.0,,14.0,,,,2.0,...,0,0,74506565ff9f0d6621e232e1e62239d658786e81,0,0,0,0,0,,2020-06-25
1,20200625,AL,33206.0,336252.0,,693.0,2612.0,,761.0,,...,5,45,17d9d87e1df190897ce93d3fcbb382e6dac460e3,0,0,0,0,0,,2020-06-25
2,20200625,AR,18062.0,259318.0,,284.0,1245.0,,,61.0,...,0,31,651dd04526a06699eef6c83fc27f1850be5e46cb,0,0,0,0,0,,2020-06-25
3,20200625,AS,0.0,696.0,,,,,,,...,0,0,191bf0110e14794659c0c2f75b655e51fc2b9fb7,0,0,0,0,0,,2020-06-25
4,20200625,AZ,63030.0,401166.0,,2453.0,4406.0,611.0,,415.0,...,27,93,1ac05d6977f2752d59425412bbb0a63897260ea0,0,0,0,0,0,,2020-06-25


In [60]:
# Create script wide variables

latest_measurement_date = df_corona['Measurement Date'].max()
one_hundredth_case_date_united_states = dt.datetime.strptime('2020-03-04','%Y-%m-%d')
one_hundredth_case_date_worldwide = pd.to_datetime('2020-01-28')
yesterday_date = dt.date.today() - dt.timedelta(days=1)
two_days_ago_date = dt.date.today() - dt.timedelta(days=2)
us_population = 327000000
us_states = {
        'AK': 'Alaska',
        'AL': 'Alabama',
        'AR': 'Arkansas',
        'AS': 'American Samoa',
        'AZ': 'Arizona',
        'CA': 'California',
        'CO': 'Colorado',
        'CT': 'Connecticut',
        'DC': 'District of Columbia',
        'DE': 'Delaware',
        'FL': 'Florida',
        'GA': 'Georgia',
        'GU': 'Guam',
        'HI': 'Hawaii',
        'IA': 'Iowa',
        'ID': 'Idaho',
        'IL': 'Illinois',
        'IN': 'Indiana',
        'KS': 'Kansas',
        'KY': 'Kentucky',
        'LA': 'Louisiana',
        'MA': 'Massachusetts',
        'MD': 'Maryland',
        'ME': 'Maine',
        'MI': 'Michigan',
        'MN': 'Minnesota',
        'MO': 'Missouri',
        'MP': 'Northern Mariana Islands',
        'MS': 'Mississippi',
        'MT': 'Montana',
        'NA': 'National',
        'NC': 'North Carolina',
        'ND': 'North Dakota',
        'NE': 'Nebraska',
        'NH': 'New Hampshire',
        'NJ': 'New Jersey',
        'NM': 'New Mexico',
        'NV': 'Nevada',
        'NY': 'New York',
        'OH': 'Ohio',
        'OK': 'Oklahoma',
        'OR': 'Oregon',
        'PA': 'Pennsylvania',
        'PR': 'Puerto Rico',
        'RI': 'Rhode Island',
        'SC': 'South Carolina',
        'SD': 'South Dakota',
        'TN': 'Tennessee',
        'TX': 'Texas',
        'UT': 'Utah',
        'VA': 'Virginia',
        'VI': 'Virgin Islands',
        'VT': 'Vermont',
        'WA': 'Washington',
        'WI': 'Wisconsin',
        'WV': 'West Virginia',
        'WY': 'Wyoming'
}

In [61]:
# Create function to create single line graph
def create_single_line_graph(measurement_date_start,measurement_date_end,x_axis,y_axis,title):
    
    filtery = ((df_corona['Measurement Date'] >= measurement_date_start) & 
                (df_corona['Measurement Date'] <= measurement_date_end))

    tidy_data = df_corona.loc[filtery,:].groupby([x_axis])[y_axis].sum().reset_index()

    figure = px.line(tidy_data,
                     title=title,
                     x=x_axis, y=y_axis)

    return figure

In [63]:
# Create function to create multiple dimensions line graph
def create_multiple_line_graph(measurement_date_start,measurement_date_end,x_axis,y_axis,color,title):
    
    filtery = ((df_corona['Measurement Date'] >= measurement_date_start) & 
                (df_corona['Measurement Date'] <= measurement_date_end))

    tidy_data = df_corona.loc[filtery,:].groupby([x_axis,color])[y_axis].sum().reset_index()

    figure = px.line(tidy_data,
                     title=title,
                     color=color,
                     x=x_axis, y=y_axis)

    return figure

In [66]:
# Create line graph for US New Cases By Day
graph_us_new_cases_by_day = create_single_line_graph(
                              measurement_date_start=pd.to_datetime('20200315', format='%Y%m%d', errors='ignore'),
                              measurement_date_end=pd.to_datetime(dt.datetime.now(), format='%Y%m%d', errors='ignore'),
                              x_axis='Measurement Date', y_axis='New Cases',title='US New Cases By Day')

graph_us_new_cases_by_day.show()

In [88]:
# Create line graph for US New Cases By Day By State
graph_us_new_cases_by_day_by_state = create_multiple_line_graph(
                           measurement_date_start=pd.to_datetime('20200315', format='%Y%m%d', errors='ignore'),
                           measurement_date_end=pd.to_datetime(dt.datetime.now(), format='%Y%m%d', errors='ignore'),
                           x_axis='Measurement Date', y_axis='New Cases',color='state',
                           title='US New Cases By Day By State')

graph_us_new_cases_by_day_by_state.show()

In [73]:
# Create line graph for people in hospital by day
graph_us_hospitalizations_by_day = create_single_line_graph(
                              measurement_date_start=pd.to_datetime('20200315', format='%Y%m%d', errors='ignore'),
                              measurement_date_end=pd.to_datetime(dt.datetime.now(), format='%Y%m%d', errors='ignore'),
                              x_axis='Measurement Date', y_axis='Current Hospitalizations',title='US Current Hospitlizations For COVID-19 By Day')

graph_us_hospitalizations_by_day.show()

In [74]:
# Create line graph for US Current Hospitalizations By Day By State
# ICU Capacity By State: https://www.forbes.com/sites/niallmccarthy/2020/05/18/icu-bed-capacity-in-all-50-us-states-compared-infographic/#315fdc1c24dc
graph_us_hospitilizations_by_day_by_state = create_multiple_line_graph(
                           measurement_date_start=pd.to_datetime('20200315', format='%Y%m%d', errors='ignore'),
                           measurement_date_end=pd.to_datetime(dt.datetime.now(), format='%Y%m%d', errors='ignore'),
                           x_axis='Measurement Date', y_axis='Current Hospitalizations',color='state',
                           title='US Hospitilizations For COVID-19 By Day By State')

graph_us_hospitilizations_by_day_by_state.show()

In [69]:
# Create line graph for US New Deaths By Day
graph_us_new_deaths_by_day = create_single_line_graph(
                              measurement_date_start=pd.to_datetime('20200315', format='%Y%m%d', errors='ignore'),
                              measurement_date_end=pd.to_datetime(dt.datetime.now(), format='%Y%m%d', errors='ignore'),
                              x_axis='Measurement Date', y_axis='New Deaths',title='US New Deaths By Day')

graph_us_new_deaths_by_day.show()

In [84]:
# Create line graph for US New Cases By Day By State
graph_us_new_deaths_by_day_by_state = create_multiple_line_graph(
                           measurement_date_start=pd.to_datetime('20200315', format='%Y%m%d', errors='ignore'),
                           measurement_date_end=pd.to_datetime(dt.datetime.now(), format='%Y%m%d', errors='ignore'),
                           x_axis='Measurement Date', y_axis='New Deaths',color='state',
                           title='US New Deaths By Day By State')

graph_us_new_deaths_by_day_by_state.show()

In [86]:
# Create line graph for US New Deaths By Day
graph_us_cumulative_cases_by_day = create_single_line_graph(
                              measurement_date_start=pd.to_datetime('20200315', format='%Y%m%d', errors='ignore'),
                              measurement_date_end=pd.to_datetime(dt.datetime.now(), format='%Y%m%d', errors='ignore'),
                              x_axis='Measurement Date', y_axis='Cumulative Cases',title='US Cumulative Cases By Day')

graph_us_cumulative_cases_by_day.show()

In [None]:
# Create line graph for US Cumulative Cases By Day By State
graph_us_cumulative_cases_by_day_by_state = create_multiple_line_graph(
                           measurement_date_start=pd.to_datetime('20200315', format='%Y%m%d', errors='ignore'),
                           measurement_date_end=pd.to_datetime(dt.datetime.now(), format='%Y%m%d', errors='ignore'),
                           x_axis='Measurement Date', y_axis='Cumulative Cases',color='state',
                           title='US Cumulative Cases By Day By State')

graph_us_cumulative_cases_by_day_by_state.show()

In [47]:
# Create line graph for US Cumulative Deaths By Day
graph_us_cumulative_deaths_by_day = create_single_line_graph(
                              measurement_date_start=pd.to_datetime('20200315', format='%Y%m%d', errors='ignore'),
                              measurement_date_end=pd.to_datetime(dt.datetime.now(), format='%Y%m%d', errors='ignore'),
                              x_axis='Measurement Date', y_axis='Cumulative Deaths',title='US Cumulative Deaths By Day')

graph_us_cumulative_deaths_by_day.show()

In [78]:
# Create line graph for US Cumulative Deaths By Day By State
graph_us_cumulative_deaths_by_day_by_state = create_multiple_line_graph(
                           measurement_date_start=pd.to_datetime('20200315', format='%Y%m%d', errors='ignore'),
                           measurement_date_end=pd.to_datetime(dt.datetime.now(), format='%Y%m%d', errors='ignore'),
                           x_axis='Measurement Date', y_axis='Cumulative Deaths',color='state',
                           title='US Cumulative Deaths By Day By State')

graph_us_cumulative_deaths_by_day_by_state.show()

In [87]:
graph_us_new_cases_by_day.show()
graph_us_new_cases_by_day_by_state.show()
graph_us_hospitalizations_by_day.show()
graph_us_hospitilizations_by_day_by_state.show()
graph_us_new_deaths_by_day.show()
graph_us_new_deaths_by_day_by_state.show()
graph_us_cumulative_cases_by_day.show()
graph_us_cumulative_cases_by_day_by_state.show()
graph_us_cumulative_deaths_by_day.show()
graph_us_cumulative_deaths_by_day_by_state.show()