# COVID-19 (SARS-CoV-2) Exploratory Data Anlaysis in the United States (U.S.)
## Data gathered via "The COVID Tracking Project" Public API

The data was collected via the COVID Tracking Project API, found [here](https://covidtracking.com/data/api). This data is widely available and is considered high quality. The Atlantic runs The COVID Tracking Project and the associated API. 

### Import Statistical Packages

In [1]:
# Imports
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.offline import iplot
import plotly.graph_objects as go
import os

### Analysis at the Federal Level (U.S)

In [12]:
url = 'https://api.covidtracking.com/v1/us/daily.json'
df = pd.read_json(url)
df['date'] = pd.to_datetime(df['date'].astype(str), format='%Y%m%d')
df.head()

Unnamed: 0,date,states,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,...,totalTestResults,lastModified,total,posNeg,deathIncrease,hospitalizedIncrease,negativeIncrease,positiveIncrease,totalTestResultsIncrease,hash
0,2020-12-15,56,16529187,174109390,9693.0,112816.0,622898.0,21897.0,33958.0,7702.0,...,222699096,2020-12-15T24:00:00Z,0,0,2918,4322,1290364,189783,1742813,d8a0bc579b6eb9504e3e6e91b4792c95edf2ad60
1,2020-12-14,56,16339404,172819026,11448.0,110549.0,618576.0,21456.0,33693.0,7706.0,...,220956283,2020-12-14T24:00:00Z,0,0,1358,3569,1690191,193384,2159460,5ae02d88852aacd9b1e24a2fd89112f3ce8752a0
2,2020-12-13,56,16146020,171128835,11382.0,109298.0,615007.0,21230.0,33494.0,7535.0,...,218796823,2020-12-13T24:00:00Z,0,0,1494,2314,1214194,187754,1762480,f83998eb4d2ed0f4f5ebc07564e086a0b91f1ae7
3,2020-12-12,56,15958266,169914641,11345.0,108461.0,612693.0,21198.0,33419.0,7515.0,...,217034343,2020-12-12T24:00:00Z,0,0,2494,3695,1163977,225196,1810768,4103a4cce26150e5a7896927e640c366a9f5c90e
4,2020-12-11,56,15733070,168750664,13421.0,108108.0,608998.0,21010.0,33237.0,7489.0,...,215223575,2020-12-11T24:00:00Z,0,0,2749,5444,1345259,234810,1990255,ad61d461f18171426c6be4761e8cc90be12c09d5


In [13]:
df.columns

Index(['date', 'states', 'positive', 'negative', 'pending',
       'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently',
       'inIcuCumulative', 'onVentilatorCurrently', 'onVentilatorCumulative',
       'recovered', 'dateChecked', 'death', 'hospitalized', 'totalTestResults',
       'lastModified', 'total', 'posNeg', 'deathIncrease',
       'hospitalizedIncrease', 'negativeIncrease', 'positiveIncrease',
       'totalTestResultsIncrease', 'hash'],
      dtype='object')

In [18]:
fig_1 = px.scatter(x=df['date'], y=df['positive'], title='Overall Positive Cases in the United States (U.S.) by Date',
                labels={'x': 'Date', 'y': 'Number of Positive Cases (M)'})
fig_1.show()

### fig_5 = px.scatter(x=df['date'], y=df['totalTestResultsIncrease'], title='Total Increase in Daily Testing for COVID-19 (U.S.)',
                labels={'x': 'Date', 'y': 'Number of Tests Completed'})

fig_5.show()

### Analysis at the State Level (U.S.)

In [4]:
url_state = 'https://api.covidtracking.com/v1/states/daily.json'
df_state = pd.read_json(url_state)

In [5]:
df_state = df_state[['date', 'state', 'positive', 'deathConfirmed', 'hospitalized', 'positiveIncrease']]
df_state['date'] = pd.to_datetime(df_state['date'].astype(str), format='%Y%m%d')
df_state.head(5)

Unnamed: 0,date,state,positive,deathConfirmed,hospitalized,positiveIncrease
0,2020-12-15,AK,40438.0,179.0,908.0,278
1,2020-12-15,AL,301533.0,3642.0,29259.0,3638
2,2020-12-15,AR,189198.0,2672.0,10096.0,1691
3,2020-12-15,AS,0.0,,,0
4,2020-12-15,AZ,424382.0,6835.0,31266.0,4134


In [6]:
fig_2 = px.scatter(x=df_state['date'], y=df_state['positive'],color=df_state['state'], title='Overall Positive Cases by State in the United States (U.S.) by Date',
                labels={'x': 'Date', 'y': 'Number of Positive Cases (M)'})
fig_2.show()

In [7]:
fig_3 = px.scatter(x=df_state['date'], y=df_state['deathConfirmed'],color=df_state['state'], title='Overall Deaths by State in the United States (U.S.) by Date',
                labels={'x': 'Date', 'y': 'Number of Deaths Confirmed (M)'})
fig_3.show()

In [8]:
fig_4 = px.scatter(x=df_state['date'], y=df_state['positiveIncrease'], color=df_state['state'], title='Count Change of Positive Cases by State per Day',
                  labels={'x': 'Date', 'y': 'Number of Cases Increase per Day'})
fig_4.show()

### Export Charts to JPEG for Use in Documentation

In [17]:
if not os.path.exists("images"):
    os.mkdir("images")
    
# Fig 1
fig_1.write_image('./images/fig_1.jpeg')
# Fig 2
fig_2.write_image('./images/fig_2.jpeg')
# Fig 3
fig_3.write_image('./images/fig_3.jpeg')
# Fig 4
fig_4.write_image('./images/fig_4.jpeg')
# Fig 5
fig_5.write_image('./images/fig_5.jpeg')