In [1]:
#Various libraries that are needed to work with the data

import plotly.express as px
import pandas as pd
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sodapy import Socrata

In [2]:
#data retreived from CDC https://data.cdc.gov/Case-Surveillance/United-States-COVID-19-Cases-and-Deaths-by-State-o/9mfq-cb36
#variants https://www.who.int/en/activities/tracking-SARS-CoV-2-variants/

client = Socrata("data.cdc.gov", None)

results = client.get("9mfq-cb36", limit = 100000)

covid = pd.DataFrame.from_records(results)



# Exploration and Cleaning

In [3]:
covid.head()

Unnamed: 0,submission_date,state,tot_cases,conf_cases,prob_cases,new_case,pnew_case,tot_death,new_death,pnew_death,created_at,consent_cases,consent_deaths,conf_death,prob_death
0,2022-01-14T00:00:00.000,KS,621273,470516.0,150757.0,19414.0,6964,7162,21.0,4,2022-01-15T14:59:30.476,Agree,,,
1,2022-01-02T00:00:00.000,AS,11,,,0.0,0,0,0.0,0,2022-01-03T15:18:16.200,,,,
2,2022-01-30T00:00:00.000,CO,1240361,1133083.0,107278.0,0.0,0,11061,0.0,0,2022-01-31T16:55:36.159,Agree,Agree,9754.0,1307.0
3,2020-07-09T00:00:00.000,CO,36093,32964.0,3129.0,410.0,12,1706,2.0,0,2020-07-09T00:00:00.000,Agree,Agree,1379.0,327.0
4,2022-01-26T00:00:00.000,CO,1222893,1117524.0,105369.0,6962.0,1247,10953,20.0,4,2022-01-27T14:52:03.278,Agree,Agree,9666.0,1287.0


In [4]:
len(covid)

47220

In [5]:
covid.isnull().sum()

submission_date        0
state                  0
tot_cases              0
conf_cases         21362
prob_cases         21434
new_case               0
pnew_case           3784
tot_death              0
new_death              0
pnew_death          3799
created_at             0
consent_cases       3153
consent_deaths      3935
conf_death         21742
prob_death         21742
dtype: int64

In [6]:
covid.dtypes

submission_date    object
state              object
tot_cases          object
conf_cases         object
prob_cases         object
new_case           object
pnew_case          object
tot_death          object
new_death          object
pnew_death         object
created_at         object
consent_cases      object
consent_deaths     object
conf_death         object
prob_death         object
dtype: object

In [7]:
covid.submission_date = pd.to_datetime(covid.submission_date, infer_datetime_format = True)
covid.tot_cases = pd.to_numeric(covid.tot_cases)
covid.conf_cases = pd.to_numeric(covid.conf_cases)
covid.prob_cases = pd.to_numeric(covid.prob_cases)
covid.new_case = pd.to_numeric(covid.new_case)
covid.pnew_case = pd.to_numeric(covid.pnew_case)
covid.tot_death = pd.to_numeric(covid.tot_death)
covid.new_death = pd.to_numeric(covid.new_death)
covid.pnew_death = pd.to_numeric(covid.pnew_death)
covid.created_at = pd.to_datetime(covid.created_at, infer_datetime_format = True)
covid.conf_death = pd.to_numeric(covid.conf_death)
covid.prob_death = pd.to_numeric(covid.prob_death)

In [8]:
covid.dtypes

submission_date    datetime64[ns]
state                      object
tot_cases                   int64
conf_cases                float64
prob_cases                float64
new_case                  float64
pnew_case                 float64
tot_death                   int64
new_death                 float64
pnew_death                float64
created_at         datetime64[ns]
consent_cases              object
consent_deaths             object
conf_death                float64
prob_death                float64
dtype: object

In [9]:
sorted_covid = covid.sort_values(by = "submission_date", ascending = False)

In [10]:
sorted_covid.head(-10)

Unnamed: 0,submission_date,state,tot_cases,conf_cases,prob_cases,new_case,pnew_case,tot_death,new_death,pnew_death,created_at,consent_cases,consent_deaths,conf_death,prob_death
7592,2022-03-18,RMI,4,4.0,0.0,0.0,0.0,0,0.0,0.0,2022-03-19 13:30:21.744,Agree,Agree,0.0,0.0
43017,2022-03-18,MP,10689,10689.0,0.0,0.0,0.0,31,0.0,0.0,2022-03-19 13:30:21.744,Agree,Agree,31.0,0.0
43694,2022-03-18,TX,6616597,,,3130.0,1685.0,85399,79.0,0.0,2022-03-19 13:30:21.744,Not agree,Not agree,,
7989,2022-03-18,NY,2650390,,,1287.0,0.0,27169,8.0,0.0,2022-03-19 13:30:21.744,Not agree,Not agree,,
17904,2022-03-18,NE,477198,335647.0,141551.0,1.0,0.0,4047,0.0,0.0,2022-03-19 13:30:21.744,Agree,Agree,3306.0,741.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16004,2020-01-22,GU,0,,,0.0,,0,0.0,,2020-03-26 16:22:39.452,Not agree,Not agree,,
37010,2020-01-22,WY,0,,,0.0,,0,0.0,,2020-03-26 16:22:39.452,Agree,Agree,,
46693,2020-01-22,IA,0,,,0.0,,0,0.0,,2020-03-26 16:22:39.452,Not agree,Not agree,,
44278,2020-01-22,OH,0,,,0.0,,0,0.0,,2020-03-26 16:22:39.452,Agree,Agree,,


In [11]:
stateData = covid.groupby(["state"]).agg("max")
stateData.head()

  return f(*args, **kwargs)


Unnamed: 0_level_0,submission_date,tot_cases,conf_cases,prob_cases,new_case,pnew_case,tot_death,new_death,pnew_death,created_at,consent_deaths,conf_death,prob_death
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
AK,2022-03-18,236965,,,9956.0,0.0,1169,50.0,0.0,2022-03-19 13:30:21.744,,,
AL,2022-03-18,1291406,929663.0,361743.0,17107.0,6405.0,19093,96.0,31.0,2022-03-19 13:30:21.744,Agree,14667.0,4426.0
AR,2022-03-18,828889,,,14494.0,9154.0,11053,154.0,163.0,2022-03-19 13:30:21.744,Not agree,,
AS,2022-03-18,1188,,,287.0,0.0,0,0.0,0.0,2022-03-19 13:30:21.744,,,
AZ,2022-03-18,1992471,1762272.0,230199.0,27681.0,4295.0,28547,457.0,216.0,2022-03-19 13:30:21.744,Agree,24211.0,4336.0


In [12]:
#Creating variables to show how much each state contribute to the total deaths and cases

stateData["TotalDeathPercent"] = stateData["tot_cases"] / stateData["tot_cases"].sum()
stateData["TotalCasePercent"] = stateData["tot_death"]  / stateData["tot_death"].sum()

In [13]:
covid.set_index("submission_date", inplace = True)

In [14]:
covid.columns

Index(['state', 'tot_cases', 'conf_cases', 'prob_cases', 'new_case',
       'pnew_case', 'tot_death', 'new_death', 'pnew_death', 'created_at',
       'consent_cases', 'consent_deaths', 'conf_death', 'prob_death'],
      dtype='object')

In [15]:
grouped_covid = covid.groupby(["submission_date"]).agg("sum")

In [16]:
grouped_covid.dtypes

tot_cases       int64
conf_cases    float64
prob_cases    float64
new_case      float64
pnew_case     float64
tot_death       int64
new_death     float64
pnew_death    float64
conf_death    float64
prob_death    float64
dtype: object

In [17]:
grouped_covid["Mortality_Rate"] = ((grouped_covid["new_death"] / grouped_covid["new_case"]))

In [18]:
#Calculating Totals for the pandemic so far and the Mortality Rate

Total_Deaths = grouped_covid["new_death"].sum()
Total_Cases = grouped_covid["new_case"].sum()
Overall_Mortality_Rate = (Total_Deaths/Total_Cases) *100

print("Total Deaths: ", Total_Deaths)
print("Total Cases: ", Total_Cases)
print("Overall Mortality Rate: ", Overall_Mortality_Rate)

Total Deaths:  952296.0
Total Cases:  79080371.0
Overall Mortality Rate:  1.2042128633918523


In [19]:
#using the rolling function to create a rolling average on a 6 day basis

grouped_covid["cases7MA"] = grouped_covid["new_case"].rolling(window = 7).mean()
grouped_covid["death7MA"] = grouped_covid["new_death"].rolling(window = 7).mean()

In [20]:
#Resetting the Mortality rate to the Moving average to make it more accurate due to the lag in deaths compared
#comparted to cases

grouped_covid["Mortality_Rate"] = ((grouped_covid["death7MA"] / grouped_covid["cases7MA"]))

# Graphing and Mapping the Clean Data and Transformations

In [31]:
fig = make_subplots(rows=2, cols=1, shared_xaxes = True, vertical_spacing=0.02)


fig.add_trace(go.Scatter(x = grouped_covid.index, y = grouped_covid.new_case, name = "Cases", 
                         line=dict(color="crimson"), hovertemplate="%{y}<br>"),
             row = 1, col = 1)

fig.add_trace(go.Scatter(x = grouped_covid.index, y = grouped_covid.cases7MA, name = "7 day MA Cases",
                        line=dict(color="blue"), text = grouped_covid.Mortality_Rate,
                         hovertemplate="%{y}<br><br>" + "Daily Mortality Rate: %{text:.2%} <br>"),
             row = 1, col = 1)

fig.add_trace(go.Scatter(x = grouped_covid.index, y = grouped_covid.new_death, name = "Deaths",
                        line=dict(color="darkcyan"), hovertemplate="%{y}<br>"),
             row = 2, col = 1)

fig.add_trace(go.Scatter(x = grouped_covid.index, y = grouped_covid.death7MA, name = "7 day MA Deaths",
                        line=dict(color="darkblue"), text = grouped_covid.Mortality_Rate,
                        hovertemplate="%{y}<br><br>" + "Daily Mortality Rate: %{text:.2%} <br>"),
             row = 2, col = 1)

fig.update_xaxes(title = None)
fig.update_yaxes(title = None)

fig.update_layout(hovermode="x unified",
                 showlegend=True,
                 title = "Coronavirus Cases and Deaths")

#fig.update_traces(hovertemplate='Cases: %{grouped_covid.new_case} <br>Deaths: %{grouped_covid.new_death}') #

#extends the hoverline option to both subplots but removes the vertical rectangles
#fig.update_traces(xaxis='x1')

#Alpha variant of Coronavirus
fig.add_vrect(type="rect",
    x0="2020-01-22", x1="2020-05-01",
    line=dict(
        color="RoyalBlue",
        width=1,
    ),
    fillcolor="LightSkyBlue", 
    opacity = 0.5,
    layer = "below",
    annotation_text="Alpha",
    annotation_position="top left"
)

#Beta variant of Coronavirus
fig.add_vrect(type="rect",
    x0="2020-05-01", x1="2020-11-01",
    line=dict(
        color="Red",
        width=1,
    ),
    fillcolor="Pink", 
    opacity = 0.5,
    layer = "below",
    annotation_text="Beta",
    annotation_position="top left"
)

#Delta variant of Coronavirus
fig.add_vrect(type="rect",
    x0="2020-11-01", x1="2021-11-01",
    line=dict(
        color="RoyalBlue",
        width=1,
    ),
    fillcolor="LightSkyBlue", 
    opacity = 0.5,
    layer = "below",
    annotation_text="Delta",
    annotation_position="top left"
)

#Omicron variant of Coronavirus
x1 = str(grouped_covid.index.max())

fig.add_vrect(type="rect",
    x0="2021-11-01", x1=x1,
    line=dict(
        color="red",
        width=1,
    ),
    fillcolor="Pink", 
    opacity = 0.5,
    layer = "below",
    annotation_text="Omicron",
    annotation_position="top left"
)

fig.show()

print("Centers for Disease Control and Prevention, COVID-19 Response. \nCOVID-19 Case Surveillance Public Data Access, Summary, and Limitations")

Centers for Disease Control and Prevention, COVID-19 Response. 
COVID-19 Case Surveillance Public Data Access, Summary, and Limitations


In [22]:
fig.write_html("CovidPlot.html")

In [23]:
map1 = px.choropleth(stateData, locations= stateData.index, locationmode="USA-states", color= "tot_cases", scope="usa",
                   title = "Total Cases", hover_data = [stateData.index, "tot_cases", "TotalCasePercent"])
map1.show()

In [24]:
map2 = px.choropleth(stateData, locations= stateData.index, locationmode="USA-states", color= stateData.tot_death, scope="usa",
                   title = "Total Deaths", hover_data = [stateData.index, "tot_death", "TotalDeathPercent"])
map2.show()

In [25]:
map2.write_html("chloroplethCovid.html")