<a href="https://colab.research.google.com/github/spberry4/Covid_Analysis/blob/main/Covid%20Exploration%20and%20Plotting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install sodapy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sodapy
  Downloading sodapy-2.2.0-py2.py3-none-any.whl (15 kB)
Collecting requests>=2.28.1
  Downloading requests-2.28.2-py3-none-any.whl (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.8/62.8 KB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: requests, sodapy
  Attempting uninstall: requests
    Found existing installation: requests 2.27.1
    Uninstalling requests-2.27.1:
      Successfully uninstalled requests-2.27.1
Successfully installed requests-2.28.2 sodapy-2.2.0


In [4]:
#Various libraries that are needed to work with the data

import plotly.express as px
import pandas as pd
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sodapy import Socrata

In [5]:
#data retreived from CDC https://data.cdc.gov/Case-Surveillance/United-States-COVID-19-Cases-and-Deaths-by-State-o/9mfq-cb36
#variants https://www.who.int/en/activities/tracking-SARS-CoV-2-variants/

client = Socrata("data.cdc.gov", None)

results = client.get("9mfq-cb36", limit = 100000)

covid = pd.DataFrame.from_records(results)



# Exploration and Cleaning

In [6]:
covid.head()

Unnamed: 0,submission_date,state,tot_cases,conf_cases,prob_cases,new_case,pnew_case,tot_death,new_death,pnew_death,created_at,consent_cases,consent_deaths,conf_death,prob_death
0,2021-03-11T00:00:00.000,KS,297229,241035.0,56194.0,0.0,0,4851,0.0,0,2021-03-12T15:20:13.190,Agree,,,
1,2021-12-01T00:00:00.000,ND,163565,135705.0,27860.0,589.0,220,1907,9.0,0,2021-12-02T14:35:20.922,Agree,Not agree,,
2,2022-01-02T00:00:00.000,AS,11,,,0.0,0,0,0.0,0,2022-01-03T15:18:16.200,,,,
3,2021-11-22T00:00:00.000,AL,841461,620483.0,220978.0,703.0,357,16377,7.0,3,2021-11-22T00:00:00.000,Agree,Agree,12727.0,3650.0
4,2022-05-30T00:00:00.000,AK,251425,,,0.0,0,1252,0.0,0,2022-05-31T13:20:20.883,,,,


In [7]:
len(covid)

60060

In [8]:
covid.isnull().sum()

submission_date        0
state                  0
tot_cases              0
conf_cases         26026
prob_cases         26098
new_case               0
pnew_case           3526
tot_death              0
new_death              0
pnew_death          3494
created_at             0
consent_cases       4009
consent_deaths      5005
conf_death         26787
prob_death         26787
dtype: int64

In [9]:
covid.dtypes

submission_date    object
state              object
tot_cases          object
conf_cases         object
prob_cases         object
new_case           object
pnew_case          object
tot_death          object
new_death          object
pnew_death         object
created_at         object
consent_cases      object
consent_deaths     object
conf_death         object
prob_death         object
dtype: object

In [10]:
covid.submission_date = pd.to_datetime(covid.submission_date, infer_datetime_format = True)
covid.tot_cases = pd.to_numeric(covid.tot_cases)
covid.conf_cases = pd.to_numeric(covid.conf_cases)
covid.prob_cases = pd.to_numeric(covid.prob_cases)
covid.new_case = pd.to_numeric(covid.new_case)
covid.pnew_case = pd.to_numeric(covid.pnew_case)
covid.tot_death = pd.to_numeric(covid.tot_death)
covid.new_death = pd.to_numeric(covid.new_death)
covid.pnew_death = pd.to_numeric(covid.pnew_death)
covid.created_at = pd.to_datetime(covid.created_at, infer_datetime_format = True)
covid.conf_death = pd.to_numeric(covid.conf_death)
covid.prob_death = pd.to_numeric(covid.prob_death)

In [11]:
covid.dtypes

submission_date    datetime64[ns]
state                      object
tot_cases                   int64
conf_cases                float64
prob_cases                float64
new_case                  float64
pnew_case                 float64
tot_death                   int64
new_death                 float64
pnew_death                float64
created_at         datetime64[ns]
consent_cases              object
consent_deaths             object
conf_death                float64
prob_death                float64
dtype: object

In [12]:
sorted_covid = covid.sort_values(by = "submission_date", ascending = False)

In [13]:
sorted_covid.head(-10)

Unnamed: 0,submission_date,state,tot_cases,conf_cases,prob_cases,new_case,pnew_case,tot_death,new_death,pnew_death,created_at,consent_cases,consent_deaths,conf_death,prob_death
59910,2022-10-18,IN,1930022,,,0.0,0.0,24816,0.0,0.0,2022-10-19 13:15:24.690,Not agree,Agree,23745.0,1071.0
49839,2022-10-18,KS,883063,655917.0,227146.0,0.0,0.0,9590,0.0,0.0,2022-10-19 13:15:24.690,Agree,,,
16137,2022-10-18,VA,2106527,1490236.0,616291.0,942.0,434.0,22057,16.0,2.0,2022-10-19 13:15:24.690,Agree,Agree,18326.0,3731.0
58417,2022-10-18,GU,58703,,,0.0,0.0,401,0.0,0.0,2022-10-19 13:15:24.690,Not agree,Not agree,,
28617,2022-10-18,CA,11309237,10458792.0,850445.0,0.0,0.0,95604,0.0,0.0,2022-10-19 13:15:24.690,Agree,Not agree,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51294,2020-01-22,WV,0,,,0.0,,0,0.0,,2020-03-26 16:22:39.452,Agree,Not agree,,
28909,2020-01-22,WY,0,,,0.0,,0,0.0,,2020-03-26 16:22:39.452,Agree,Agree,,
17053,2020-01-22,DC,0,,,0.0,,0,0.0,,2020-03-26 16:22:39.452,,,,
34863,2020-01-22,VT,0,,,0.0,,0,0.0,,2020-03-26 16:22:39.452,Not agree,Not agree,,


In [14]:
stateData = covid.groupby(["state"]).agg("max")
stateData.head()

  stateData = covid.groupby(["state"]).agg("max")


Unnamed: 0_level_0,submission_date,tot_cases,conf_cases,prob_cases,new_case,pnew_case,tot_death,new_death,pnew_death,created_at,consent_deaths,conf_death,prob_death
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
AK,2022-10-18,284076,,,9956.0,0.0,1356,50.0,0.0,2022-10-19 13:15:24.690,,,
AL,2022-10-18,1530803,1084554.0,446249.0,17086.0,6396.0,20526,96.0,31.0,2022-10-19 13:15:24.690,Agree,15685.0,4841.0
AR,2022-10-18,956424,,,14494.0,9154.0,12386,154.0,163.0,2022-10-19 13:15:24.690,Not agree,,
AS,2022-10-18,8251,,,1016.0,0.0,34,5.0,0.0,2022-10-19 13:15:24.690,,,
AZ,2022-10-18,2277635,1994096.0,283539.0,27681.0,4295.0,31455,457.0,216.0,2022-10-19 13:15:24.690,Agree,27408.0,4425.0


In [15]:
#Creating variables to show how much each state contribute to the total deaths and cases

stateData["TotalDeathPercent"] = stateData["tot_cases"] / stateData["tot_cases"].sum()
stateData["TotalCasePercent"] = stateData["tot_death"]  / stateData["tot_death"].sum()

In [16]:
covid.set_index("submission_date", inplace = True)

In [17]:
covid.columns

Index(['state', 'tot_cases', 'conf_cases', 'prob_cases', 'new_case',
       'pnew_case', 'tot_death', 'new_death', 'pnew_death', 'created_at',
       'consent_cases', 'consent_deaths', 'conf_death', 'prob_death'],
      dtype='object')

In [18]:
grouped_covid = covid.groupby(["submission_date"]).agg("sum")

In [19]:
grouped_covid.dtypes

tot_cases       int64
conf_cases    float64
prob_cases    float64
new_case      float64
pnew_case     float64
tot_death       int64
new_death     float64
pnew_death    float64
conf_death    float64
prob_death    float64
dtype: object

In [20]:
grouped_covid["Mortality_Rate"] = ((grouped_covid["new_death"] / grouped_covid["new_case"]))

In [21]:
#Calculating Totals for the pandemic so far and the Mortality Rate

Total_Deaths = grouped_covid["new_death"].sum()
Total_Cases = grouped_covid["new_case"].sum()
Overall_Mortality_Rate = (Total_Deaths/Total_Cases) *100

print("Total Deaths: ", Total_Deaths)
print("Total Cases: ", Total_Cases)
print("Overall Mortality Rate: ", Overall_Mortality_Rate)

Total Deaths:  1042962.0
Total Cases:  96180659.0
Overall Mortality Rate:  1.0843780972638168


In [22]:
#using the rolling function to create a rolling average on a 7 day basis

grouped_covid["cases7MA"] = grouped_covid["new_case"].rolling(window = 7).mean()
grouped_covid["death7MA"] = grouped_covid["new_death"].rolling(window = 7).mean()

In [23]:
#Resetting the Mortality rate to the Moving average to make it more accurate due to the lag in deaths compared
#comparted to cases

grouped_covid["Mortality_Rate"] = ((grouped_covid["death7MA"] / grouped_covid["cases7MA"]))

# Graphing and Mapping the Clean Data and Transformations

In [24]:
fig = make_subplots(rows=2, cols=1, shared_xaxes = True, vertical_spacing=0.02)


fig.add_trace(go.Scatter(x = grouped_covid.index, y = grouped_covid.new_case, name = "Cases", 
                         line=dict(color="crimson"), hovertemplate="%{y}<br>"),
             row = 1, col = 1)

fig.add_trace(go.Scatter(x = grouped_covid.index, y = grouped_covid.cases7MA, name = "7 day MA Cases",
                        line=dict(color="blue"), text = grouped_covid.Mortality_Rate,
                         hovertemplate="%{y}<br><br>" + "Daily Mortality Rate: %{text:.2%} <br>"),
             row = 1, col = 1)

fig.add_trace(go.Scatter(x = grouped_covid.index, y = grouped_covid.new_death, name = "Deaths",
                        line=dict(color="darkcyan"), hovertemplate="%{y}<br>"),
             row = 2, col = 1)

fig.add_trace(go.Scatter(x = grouped_covid.index, y = grouped_covid.death7MA, name = "7 day MA Deaths",
                        line=dict(color="darkblue"), text = grouped_covid.Mortality_Rate,
                        hovertemplate="%{y}<br><br>" + "Daily Mortality Rate: %{text:.2%} <br>"),
             row = 2, col = 1)

fig.update_xaxes(title = None)
fig.update_yaxes(title = None)

fig.update_layout(hovermode="x unified",
                 showlegend=True,
                 title = "Coronavirus Cases and Deaths")

#fig.update_traces(hovertemplate='Cases: %{grouped_covid.new_case} <br>Deaths: %{grouped_covid.new_death}') #

#extends the hoverline option to both subplots but removes the vertical rectangles
#fig.update_traces(xaxis='x1')

#Alpha variant of Coronavirus
fig.add_vrect(type="rect",
    x0="2020-01-22", x1="2020-05-01",
    line=dict(
        color="RoyalBlue",
        width=1,
    ),
    fillcolor="LightSkyBlue", 
    opacity = 0.5,
    layer = "below",
    annotation_text="Alpha",
    annotation_position="top left"
)

#Beta variant of Coronavirus
fig.add_vrect(type="rect",
    x0="2020-05-01", x1="2020-11-01",
    line=dict(
        color="Red",
        width=1,
    ),
    fillcolor="Pink", 
    opacity = 0.5,
    layer = "below",
    annotation_text="Beta",
    annotation_position="top left"
)

#Delta variant of Coronavirus
fig.add_vrect(type="rect",
    x0="2020-11-01", x1="2021-11-01",
    line=dict(
        color="RoyalBlue",
        width=1,
    ),
    fillcolor="LightSkyBlue", 
    opacity = 0.5,
    layer = "below",
    annotation_text="Delta",
    annotation_position="top left"
)

#Omicron variant of Coronavirus
x1 = str(grouped_covid.index.max())

fig.add_vrect(type="rect",
    x0="2021-11-01", x1=x1,
    line=dict(
        color="red",
        width=1,
    ),
    fillcolor="Pink", 
    opacity = 0.5,
    layer = "below",
    annotation_text="Omicron",
    annotation_position="top left"
)

fig.show()

print("Centers for Disease Control and Prevention, COVID-19 Response. \nCOVID-19 Case Surveillance Public Data Access, Summary, and Limitations")

Centers for Disease Control and Prevention, COVID-19 Response. 
COVID-19 Case Surveillance Public Data Access, Summary, and Limitations


In [25]:
fig.write_html("CovidPlot.html")

In [28]:
map1 = px.choropleth(stateData, locations= stateData.index, locationmode="USA-states", color= "tot_cases", scope="usa",
                   title = "Total Cases by State in the USA", hover_data = [stateData.index, "tot_cases", "TotalCasePercent"])
map1.show()

In [29]:
map2 = px.choropleth(stateData, locations= stateData.index, locationmode="USA-states", color= stateData.tot_death, scope="usa",
                   title = "Total Deaths by State in the USA", hover_data = [stateData.index, "tot_death", "TotalDeathPercent"])
map2.show()

In [None]:
map2.write_html("chloroplethCovid.html")