# Script to consolidate data from the [Covid Tracking Project](https://covidtracking.com/) and calculate simple ratios for display in Tableau

# Packages and data import

In [1]:
# packages
import requests, json
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

# should we plot anything in this notebook, this will allow visual to display
%matplotlib inline

# show all columns
pd.set_option('display.max_columns', None)

In [2]:
# table with state names and abbreviations
states_abbr = pd.read_csv("../inputs/state_table.csv")
states_abbr.head()

Unnamed: 0,state,state_abbr
0,Alabama,AL
1,Alaska,AK
2,Arizona,AZ
3,Arkansas,AR
4,California,CA


### Load [US daily 4pm EST](https://covidtracking.com/data/us-daily) data

In [3]:
national_url = "https://covidtracking.com/api/v1/us/daily.json"

In [4]:
# request from url
national_raw = requests.get(national_url)

In [5]:
# convert request to json
national_json = national_raw.json()

In [6]:
# show first record in json file
national_json[:1]

[{'date': 20200618,
  'states': 56,
  'positive': 2177888,
  'negative': 23225610,
  'pending': 1903,
  'hospitalizedCurrently': 28185,
  'hospitalizedCumulative': 229212,
  'inIcuCurrently': 5458,
  'inIcuCumulative': 9736,
  'onVentilatorCurrently': 2520,
  'onVentilatorCumulative': 869,
  'recovered': 599115,
  'dateChecked': '2020-06-18T00:00:00Z',
  'death': 112172,
  'hospitalized': 229212,
  'lastModified': '2020-06-18T00:00:00Z',
  'total': 25405401,
  'totalTestResults': 25403498,
  'posNeg': 25403498,
  'deathIncrease': 695,
  'hospitalizedIncrease': 1118,
  'negativeIncrease': 438665,
  'positiveIncrease': 26956,
  'totalTestResultsIncrease': 465621,
  'hash': '41a07957a7fbc6a585f20d9e35ed0842f25fb496'}]

In [7]:
# convert json to dataframe
national = pd.DataFrame(national_json)

# show first five rows
national.head()

Unnamed: 0,date,states,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,onVentilatorCumulative,recovered,dateChecked,death,hospitalized,lastModified,total,totalTestResults,posNeg,deathIncrease,hospitalizedIncrease,negativeIncrease,positiveIncrease,totalTestResultsIncrease,hash
0,20200618,56,2177888,23225610.0,1903.0,28185.0,229212.0,5458.0,9736.0,2520.0,869.0,599115.0,2020-06-18T00:00:00Z,112172.0,229212.0,2020-06-18T00:00:00Z,25405401,25403498,25403498,695,1118,438665,26956,465621,41a07957a7fbc6a585f20d9e35ed0842f25fb496
1,20200617,56,2150932,22786945.0,1745.0,28311.0,228094.0,5605.0,9665.0,2588.0,857.0,592191.0,2020-06-17T00:00:00Z,111477.0,228094.0,2020-06-17T00:00:00Z,24939622,24937877,24937877,782,1168,464685,23885,488570,1490f02ca32e495de23f42c687453dec327388eb
2,20200616,56,2127047,22322260.0,1604.0,28028.0,226926.0,5564.0,9590.0,2594.0,845.0,583503.0,2020-06-16T00:00:00Z,110695.0,226926.0,2020-06-16T00:00:00Z,24450911,24449307,24449307,713,1356,441217,23498,464715,b76c38c5b2569c09d84b6559091245c2d92f6e3a
3,20200615,56,2103549,21881043.0,1619.0,27115.0,225570.0,5695.0,9516.0,2636.0,835.0,576334.0,2020-06-15T00:00:00Z,109982.0,225570.0,2020-06-15T00:00:00Z,23986211,23984592,23984592,375,509,430967,18521,449488,1d6b3f792d9ad6ab553ce0b8f358d96d70333629
4,20200614,56,2085028,21450076.0,1672.0,27247.0,225061.0,5743.0,9466.0,2716.0,834.0,561816.0,2020-06-14T00:00:00Z,109607.0,225061.0,2020-06-14T00:00:00Z,23536776,23535104,23535104,358,641,463596,21486,485082,168319cea57492b92c3e16221430b3b93c7cb866


### Load [states daily 4pm EST](https://docs.google.com/spreadsheets/u/2/d/e/2PACX-1vRwAqp96T9sYYq2-i7Tj0pvTf6XVHjDSMIKBdZHXiCGGdNC0ypEU9NbngS8mxea55JuCFuua1MUeOj5/pubhtml) data

In [8]:
states_url = "https://covidtracking.com/api/v1/states/daily.json"

In [9]:
# request from url
states_raw = requests.get(states_url)

In [10]:
# convert request to json
states_json = states_raw.json()

In [11]:
# show first record in json file
states_json[:1]

[{'date': 20200618,
  'state': 'AK',
  'positive': 708,
  'negative': 80477,
  'pending': None,
  'hospitalizedCurrently': 18,
  'hospitalizedCumulative': None,
  'inIcuCurrently': None,
  'inIcuCumulative': None,
  'onVentilatorCurrently': 0,
  'onVentilatorCumulative': None,
  'recovered': 449,
  'dataQualityGrade': 'A',
  'lastUpdateEt': '6/18/2020 00:00',
  'dateModified': '2020-06-18T00:00:00Z',
  'checkTimeEt': '06/17 20:00',
  'death': 12,
  'hospitalized': None,
  'dateChecked': '2020-06-18T00:00:00Z',
  'totalTestsViral': 81185,
  'positiveTestsViral': None,
  'negativeTestsViral': None,
  'positiveCasesViral': None,
  'fips': '02',
  'positiveIncrease': 12,
  'negativeIncrease': 3464,
  'total': 81185,
  'totalTestResults': 81185,
  'totalTestResultsIncrease': 3476,
  'posNeg': 81185,
  'deathIncrease': 0,
  'hospitalizedIncrease': 0,
  'hash': 'db312ffaaa17d8bf8435e7fa3f58db1ece2567ff',
  'commercialScore': 0,
  'negativeRegularScore': 0,
  'negativeScore': 0,
  'positiveSco

In [12]:
# convert json to dataframe
states = pd.DataFrame(states_json)

# show first five rows
states.head()

Unnamed: 0,date,state,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,onVentilatorCumulative,recovered,dataQualityGrade,lastUpdateEt,dateModified,checkTimeEt,death,hospitalized,dateChecked,totalTestsViral,positiveTestsViral,negativeTestsViral,positiveCasesViral,fips,positiveIncrease,negativeIncrease,total,totalTestResults,totalTestResultsIncrease,posNeg,deathIncrease,hospitalizedIncrease,hash,commercialScore,negativeRegularScore,negativeScore,positiveScore,score,grade
0,20200618,AK,708.0,80477.0,,18.0,,,,0.0,,449.0,A,6/18/2020 00:00,2020-06-18T00:00:00Z,06/17 20:00,12.0,,2020-06-18T00:00:00Z,81185.0,,,,2,12,3464,81185,81185,3476,81185,0,0,db312ffaaa17d8bf8435e7fa3f58db1ece2567ff,0,0,0,0,0,
1,20200618,AL,28206.0,294328.0,,663.0,2373.0,,695.0,,405.0,15974.0,B,6/18/2020 11:00,2020-06-18T11:00:00Z,06/18 07:00,810.0,2373.0,2020-06-18T11:00:00Z,,,,27796.0,1,894,10917,322534,322534,11811,322534,20,21,51542bad98c97fde34e7dc5472ef9416c6822a69,0,0,0,0,0,
2,20200618,AR,13928.0,214506.0,,226.0,1074.0,,,53.0,173.0,9376.0,B,6/18/2020 15:10,2020-06-18T15:10:00Z,06/18 11:10,208.0,1074.0,2020-06-18T15:10:00Z,,,,13928.0,5,322,7413,228434,228434,7735,228434,11,22,18689a56d425e2051c51c144e355569ce4786172,0,0,0,0,0,
3,20200618,AS,0.0,174.0,,,,,,,,,C,6/1/2020 00:00,2020-06-01T00:00:00Z,05/31 20:00,0.0,,2020-06-01T00:00:00Z,,,,,60,0,0,174,174,0,174,0,0,b514616f295b79fb24ef69691cc1f69261b6182a,0,0,0,0,0,
4,20200618,AZ,43443.0,336289.0,,1667.0,3956.0,540.0,,341.0,,6863.0,A+,6/18/2020 00:00,2020-06-18T00:00:00Z,06/17 20:00,1271.0,3956.0,2020-06-18T00:00:00Z,379374.0,,,43085.0,4,2519,11020,379732,379732,13539,379732,32,95,daaa2730525a7a7acc7d1498d3145f89c28d1ef1,0,0,0,0,0,


In [13]:
# data types by column
states.dtypes

date                          int64
state                        object
positive                    float64
negative                    float64
pending                     float64
hospitalizedCurrently       float64
hospitalizedCumulative      float64
inIcuCurrently              float64
inIcuCumulative             float64
onVentilatorCurrently       float64
onVentilatorCumulative      float64
recovered                   float64
dataQualityGrade             object
lastUpdateEt                 object
dateModified                 object
checkTimeEt                  object
death                       float64
hospitalized                float64
dateChecked                  object
totalTestsViral             float64
positiveTestsViral          float64
negativeTestsViral          float64
positiveCasesViral          float64
fips                         object
positiveIncrease              int64
negativeIncrease              int64
total                         int64
totalTestResults            

In [14]:
# set 'date' to date and 'dateChecked' to timestamp
states['date'] = pd.to_datetime(states['date'], format = "%Y%m%d").dt.strftime('%Y-%m-%d')
states['dateChecked'] = pd.to_datetime(states['dateChecked'])

# rename 'state' to 'state_abbr'
states.rename(columns = {'state': 'state_abbr'}, inplace = True)

# show first five rows
states.head()

Unnamed: 0,date,state_abbr,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,onVentilatorCumulative,recovered,dataQualityGrade,lastUpdateEt,dateModified,checkTimeEt,death,hospitalized,dateChecked,totalTestsViral,positiveTestsViral,negativeTestsViral,positiveCasesViral,fips,positiveIncrease,negativeIncrease,total,totalTestResults,totalTestResultsIncrease,posNeg,deathIncrease,hospitalizedIncrease,hash,commercialScore,negativeRegularScore,negativeScore,positiveScore,score,grade
0,2020-06-18,AK,708.0,80477.0,,18.0,,,,0.0,,449.0,A,6/18/2020 00:00,2020-06-18T00:00:00Z,06/17 20:00,12.0,,2020-06-18 00:00:00+00:00,81185.0,,,,2,12,3464,81185,81185,3476,81185,0,0,db312ffaaa17d8bf8435e7fa3f58db1ece2567ff,0,0,0,0,0,
1,2020-06-18,AL,28206.0,294328.0,,663.0,2373.0,,695.0,,405.0,15974.0,B,6/18/2020 11:00,2020-06-18T11:00:00Z,06/18 07:00,810.0,2373.0,2020-06-18 11:00:00+00:00,,,,27796.0,1,894,10917,322534,322534,11811,322534,20,21,51542bad98c97fde34e7dc5472ef9416c6822a69,0,0,0,0,0,
2,2020-06-18,AR,13928.0,214506.0,,226.0,1074.0,,,53.0,173.0,9376.0,B,6/18/2020 15:10,2020-06-18T15:10:00Z,06/18 11:10,208.0,1074.0,2020-06-18 15:10:00+00:00,,,,13928.0,5,322,7413,228434,228434,7735,228434,11,22,18689a56d425e2051c51c144e355569ce4786172,0,0,0,0,0,
3,2020-06-18,AS,0.0,174.0,,,,,,,,,C,6/1/2020 00:00,2020-06-01T00:00:00Z,05/31 20:00,0.0,,2020-06-01 00:00:00+00:00,,,,,60,0,0,174,174,0,174,0,0,b514616f295b79fb24ef69691cc1f69261b6182a,0,0,0,0,0,
4,2020-06-18,AZ,43443.0,336289.0,,1667.0,3956.0,540.0,,341.0,,6863.0,A+,6/18/2020 00:00,2020-06-18T00:00:00Z,06/17 20:00,1271.0,3956.0,2020-06-18 00:00:00+00:00,379374.0,,,43085.0,4,2519,11020,379732,379732,13539,379732,32,95,daaa2730525a7a7acc7d1498d3145f89c28d1ef1,0,0,0,0,0,


### Load state populations from Census American Community Survey (ACS5)

In [15]:
# # map url to var
# states_acs5_url = "https://api.census.gov/data/2017/acs/acs5?get=NAME,B01001_001E&for=state:*"

In [16]:
# # import data from url
# states_acs5_raw = requests.get(states_acs5_url)

In [17]:
# # data to json
# states_acs5_json = states_acs5_raw.json()

In [18]:
# # json to dataframe
# states_pop = pd.DataFrame(states_acs5_json[1:], columns = states_acs5_json[0]).rename(columns = 
#                                                         {"B01001_001E": "population",
#                                                          "NAME": "state",
#                                                          "state": "state_fips"})
# # show first five rows
# states_pop.head()

Making an additional join so we have state abbreviation - will be needed for join below

In [19]:
# # left join state abbr
# states_pop = pd.merge(states_pop, states_abbr, on = "state", how = "left")

# # rename 'state' to 'state_name'

# # show first five rows
# states_pop.head()

In [20]:
# # write states_pop to excel and use that moving forward (rather than running above cells)
# states_pop.to_excel('../outputs/State_Populations.xlsx', index = False)

<font color = 'red'> Once the above six cells of code have been run, and the state populations are saved to Excel, the line of code below can be used instead </font>

In [21]:
# read in excel file created above
states_pop = pd.read_excel('../outputs/State_Populations.xlsx')
states_pop.head()

Unnamed: 0,state,population,state_fips,state_abbr
0,Puerto Rico,3468963,72,PR
1,Alabama,4850771,1,AL
2,Alaska,738565,2,AK
3,Arizona,6809946,4,AZ
4,Arkansas,2977944,5,AR


In [22]:
states.head()

Unnamed: 0,date,state_abbr,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,onVentilatorCumulative,recovered,dataQualityGrade,lastUpdateEt,dateModified,checkTimeEt,death,hospitalized,dateChecked,totalTestsViral,positiveTestsViral,negativeTestsViral,positiveCasesViral,fips,positiveIncrease,negativeIncrease,total,totalTestResults,totalTestResultsIncrease,posNeg,deathIncrease,hospitalizedIncrease,hash,commercialScore,negativeRegularScore,negativeScore,positiveScore,score,grade
0,2020-06-18,AK,708.0,80477.0,,18.0,,,,0.0,,449.0,A,6/18/2020 00:00,2020-06-18T00:00:00Z,06/17 20:00,12.0,,2020-06-18 00:00:00+00:00,81185.0,,,,2,12,3464,81185,81185,3476,81185,0,0,db312ffaaa17d8bf8435e7fa3f58db1ece2567ff,0,0,0,0,0,
1,2020-06-18,AL,28206.0,294328.0,,663.0,2373.0,,695.0,,405.0,15974.0,B,6/18/2020 11:00,2020-06-18T11:00:00Z,06/18 07:00,810.0,2373.0,2020-06-18 11:00:00+00:00,,,,27796.0,1,894,10917,322534,322534,11811,322534,20,21,51542bad98c97fde34e7dc5472ef9416c6822a69,0,0,0,0,0,
2,2020-06-18,AR,13928.0,214506.0,,226.0,1074.0,,,53.0,173.0,9376.0,B,6/18/2020 15:10,2020-06-18T15:10:00Z,06/18 11:10,208.0,1074.0,2020-06-18 15:10:00+00:00,,,,13928.0,5,322,7413,228434,228434,7735,228434,11,22,18689a56d425e2051c51c144e355569ce4786172,0,0,0,0,0,
3,2020-06-18,AS,0.0,174.0,,,,,,,,,C,6/1/2020 00:00,2020-06-01T00:00:00Z,05/31 20:00,0.0,,2020-06-01 00:00:00+00:00,,,,,60,0,0,174,174,0,174,0,0,b514616f295b79fb24ef69691cc1f69261b6182a,0,0,0,0,0,
4,2020-06-18,AZ,43443.0,336289.0,,1667.0,3956.0,540.0,,341.0,,6863.0,A+,6/18/2020 00:00,2020-06-18T00:00:00Z,06/17 20:00,1271.0,3956.0,2020-06-18 00:00:00+00:00,379374.0,,,43085.0,4,2519,11020,379732,379732,13539,379732,32,95,daaa2730525a7a7acc7d1498d3145f89c28d1ef1,0,0,0,0,0,


### Load national from Census American Community Survey (ACS5)

In [23]:
# # map url to var
# us_acs5_url = "https://api.census.gov/data/2017/acs/acs5?get=NAME,B01001_001E&for=us:*"

# # import data from url
# us_acs5_raw = requests.get(us_acs5_url)

# # data to json
# us_acs5_json = us_acs5_raw.json()

# # json to dataframe
# us_pop = pd.DataFrame(us_acs5_json[1:], columns = us_acs5_json[0]).rename(columns = 
#                                                         {"B01001_001E": "population",
#                                                          "NAME": "country"}).drop(['us'], axis = 1)
# # show table
# us_pop

In [24]:
# # write us_pop to csv
# us_pop.to_csv('../outputs/US_Pop.csv', index = False)

<font color = 'red'> Once the US population is written to CSV, the above two cells of code can be commented out and replaced simply with the line of code below </font>

In [25]:
# read in us_pop
us_pop = pd.read_csv('../outputs/US_Pop.csv')
us_pop

Unnamed: 0,country,population
0,United States,321004407


# Join state population to state case table

In [26]:
# join the population to the state test/case table
combined_1 = pd.merge(states, states_pop[['state', 'state_abbr', 'population']]
                      , on = 'state_abbr', how = 'left')#.drop('state_abbr', 1)

# show first five rows
combined_1.head()

Unnamed: 0,date,state_abbr,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,onVentilatorCumulative,recovered,dataQualityGrade,lastUpdateEt,dateModified,checkTimeEt,death,hospitalized,dateChecked,totalTestsViral,positiveTestsViral,negativeTestsViral,positiveCasesViral,fips,positiveIncrease,negativeIncrease,total,totalTestResults,totalTestResultsIncrease,posNeg,deathIncrease,hospitalizedIncrease,hash,commercialScore,negativeRegularScore,negativeScore,positiveScore,score,grade,state,population
0,2020-06-18,AK,708.0,80477.0,,18.0,,,,0.0,,449.0,A,6/18/2020 00:00,2020-06-18T00:00:00Z,06/17 20:00,12.0,,2020-06-18 00:00:00+00:00,81185.0,,,,2,12,3464,81185,81185,3476,81185,0,0,db312ffaaa17d8bf8435e7fa3f58db1ece2567ff,0,0,0,0,0,,Alaska,738565.0
1,2020-06-18,AL,28206.0,294328.0,,663.0,2373.0,,695.0,,405.0,15974.0,B,6/18/2020 11:00,2020-06-18T11:00:00Z,06/18 07:00,810.0,2373.0,2020-06-18 11:00:00+00:00,,,,27796.0,1,894,10917,322534,322534,11811,322534,20,21,51542bad98c97fde34e7dc5472ef9416c6822a69,0,0,0,0,0,,Alabama,4850771.0
2,2020-06-18,AR,13928.0,214506.0,,226.0,1074.0,,,53.0,173.0,9376.0,B,6/18/2020 15:10,2020-06-18T15:10:00Z,06/18 11:10,208.0,1074.0,2020-06-18 15:10:00+00:00,,,,13928.0,5,322,7413,228434,228434,7735,228434,11,22,18689a56d425e2051c51c144e355569ce4786172,0,0,0,0,0,,Arkansas,2977944.0
3,2020-06-18,AS,0.0,174.0,,,,,,,,,C,6/1/2020 00:00,2020-06-01T00:00:00Z,05/31 20:00,0.0,,2020-06-01 00:00:00+00:00,,,,,60,0,0,174,174,0,174,0,0,b514616f295b79fb24ef69691cc1f69261b6182a,0,0,0,0,0,,,
4,2020-06-18,AZ,43443.0,336289.0,,1667.0,3956.0,540.0,,341.0,,6863.0,A+,6/18/2020 00:00,2020-06-18T00:00:00Z,06/17 20:00,1271.0,3956.0,2020-06-18 00:00:00+00:00,379374.0,,,43085.0,4,2519,11020,379732,379732,13539,379732,32,95,daaa2730525a7a7acc7d1498d3145f89c28d1ef1,0,0,0,0,0,,Arizona,6809946.0


# Create ratios

In [27]:
# by state, change in total from one day to the next
combined_1['tests_since_prev_day'] = combined_1.groupby('state')['total'].diff(-1)

# by state, change in positive from one day to the next
combined_1['positives_since_prev_day'] = combined_1.groupby('state')['positive'].diff(-1)

# by state, change in positive from one day to the next
combined_1['negatives_since_prev_day'] = combined_1.groupby('state')['negative'].diff(-1)

# show table
combined_1.head()

Unnamed: 0,date,state_abbr,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,onVentilatorCumulative,recovered,dataQualityGrade,lastUpdateEt,dateModified,checkTimeEt,death,hospitalized,dateChecked,totalTestsViral,positiveTestsViral,negativeTestsViral,positiveCasesViral,fips,positiveIncrease,negativeIncrease,total,totalTestResults,totalTestResultsIncrease,posNeg,deathIncrease,hospitalizedIncrease,hash,commercialScore,negativeRegularScore,negativeScore,positiveScore,score,grade,state,population,tests_since_prev_day,positives_since_prev_day,negatives_since_prev_day
0,2020-06-18,AK,708.0,80477.0,,18.0,,,,0.0,,449.0,A,6/18/2020 00:00,2020-06-18T00:00:00Z,06/17 20:00,12.0,,2020-06-18 00:00:00+00:00,81185.0,,,,2,12,3464,81185,81185,3476,81185,0,0,db312ffaaa17d8bf8435e7fa3f58db1ece2567ff,0,0,0,0,0,,Alaska,738565.0,3476.0,12.0,3464.0
1,2020-06-18,AL,28206.0,294328.0,,663.0,2373.0,,695.0,,405.0,15974.0,B,6/18/2020 11:00,2020-06-18T11:00:00Z,06/18 07:00,810.0,2373.0,2020-06-18 11:00:00+00:00,,,,27796.0,1,894,10917,322534,322534,11811,322534,20,21,51542bad98c97fde34e7dc5472ef9416c6822a69,0,0,0,0,0,,Alabama,4850771.0,11811.0,894.0,10917.0
2,2020-06-18,AR,13928.0,214506.0,,226.0,1074.0,,,53.0,173.0,9376.0,B,6/18/2020 15:10,2020-06-18T15:10:00Z,06/18 11:10,208.0,1074.0,2020-06-18 15:10:00+00:00,,,,13928.0,5,322,7413,228434,228434,7735,228434,11,22,18689a56d425e2051c51c144e355569ce4786172,0,0,0,0,0,,Arkansas,2977944.0,7735.0,322.0,7413.0
3,2020-06-18,AS,0.0,174.0,,,,,,,,,C,6/1/2020 00:00,2020-06-01T00:00:00Z,05/31 20:00,0.0,,2020-06-01 00:00:00+00:00,,,,,60,0,0,174,174,0,174,0,0,b514616f295b79fb24ef69691cc1f69261b6182a,0,0,0,0,0,,,,,,
4,2020-06-18,AZ,43443.0,336289.0,,1667.0,3956.0,540.0,,341.0,,6863.0,A+,6/18/2020 00:00,2020-06-18T00:00:00Z,06/17 20:00,1271.0,3956.0,2020-06-18 00:00:00+00:00,379374.0,,,43085.0,4,2519,11020,379732,379732,13539,379732,32,95,daaa2730525a7a7acc7d1498d3145f89c28d1ef1,0,0,0,0,0,,Arizona,6809946.0,13539.0,2519.0,11020.0


In [28]:
# look at NY state
combined_1[combined_1['state'] == "New York"].head(10)

Unnamed: 0,date,state_abbr,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,onVentilatorCumulative,recovered,dataQualityGrade,lastUpdateEt,dateModified,checkTimeEt,death,hospitalized,dateChecked,totalTestsViral,positiveTestsViral,negativeTestsViral,positiveCasesViral,fips,positiveIncrease,negativeIncrease,total,totalTestResults,totalTestResultsIncrease,posNeg,deathIncrease,hospitalizedIncrease,hash,commercialScore,negativeRegularScore,negativeScore,positiveScore,score,grade,state,population,tests_since_prev_day,positives_since_prev_day,negatives_since_prev_day
37,2020-06-18,NY,385760.0,2793900.0,,1358.0,89995.0,388.0,,278.0,,69243.0,A,6/18/2020 00:00,2020-06-18T00:00:00Z,06/17 20:00,24661.0,89995.0,2020-06-18 00:00:00+00:00,,,,385760.0,36,618,67923,3179660,3179660,68541,3179660,32,0,310b7a8b0acb1d880a8f8dff6f7bb8a7c456036b,0,0,0,0,0,,New York,19798228.0,68541.0,618.0,67923.0
93,2020-06-17,NY,385142.0,2725977.0,,1479.0,89995.0,431.0,,304.0,,69085.0,A,6/17/2020 00:00,2020-06-17T00:00:00Z,06/16 20:00,24629.0,89995.0,2020-06-17 00:00:00+00:00,,,,385142.0,36,567,58774,3111119,3111119,59341,3111119,21,0,3a6e13e08b23cad07516ed3f4a0701b3946aee59,0,0,0,0,0,,New York,19798228.0,59341.0,567.0,58774.0
149,2020-06-16,NY,384575.0,2667203.0,,1538.0,89995.0,449.0,,303.0,,68938.0,A,6/16/2020 00:00,2020-06-16T00:00:00Z,06/15 20:00,24608.0,89995.0,2020-06-16 00:00:00+00:00,,,,384575.0,36,631,59937,3051778,3051778,60568,3051778,29,0,7278f5d47d05271633bcff6507a1313ce68703a7,0,0,0,0,0,,New York,19798228.0,60568.0,631.0,59937.0
205,2020-06-15,NY,383944.0,2607266.0,,1608.0,89995.0,470.0,,323.0,,68851.0,A,6/15/2020 00:00,2020-06-15T00:00:00Z,06/14 20:00,24579.0,89995.0,2020-06-15 00:00:00+00:00,,,,383944.0,36,620,55991,2991210,2991210,56611,2991210,28,0,687804ec7fbe947b02eb239a57cc805aec4883c9,0,0,0,0,0,,New York,19798228.0,56611.0,620.0,55991.0
261,2020-06-14,NY,383324.0,2551275.0,,1657.0,89995.0,499.0,,346.0,,68761.0,A,6/13/2020 00:00,2020-06-13T00:00:00Z,06/12 20:00,24551.0,89995.0,2020-06-13 00:00:00+00:00,,,,383324.0,36,694,61665,2934599,2934599,62359,2934599,24,0,f807fc3c2646802e57ff5f1b55a02e67139c1bd2,0,0,0,0,0,,New York,19798228.0,62359.0,694.0,61665.0
317,2020-06-13,NY,382630.0,2489610.0,,1734.0,89995.0,517.0,,360.0,,68584.0,A,6/12/2020 00:00,2020-06-12T00:00:00Z,06/11 20:00,24527.0,89995.0,2020-06-12 00:00:00+00:00,,,,382630.0,36,916,69924,2872240,2872240,70840,2872240,32,0,fc7045a058c6900c23e63fea1169e0001b87fe29,0,0,0,0,0,,New York,19798228.0,70840.0,916.0,69924.0
373,2020-06-12,NY,381714.0,2419686.0,,1898.0,89995.0,552.0,,387.0,,68399.0,A,6/12/2020 00:00,2020-06-12T00:00:00Z,06/11 20:00,24495.0,89995.0,2020-06-12 00:00:00+00:00,,,,381714.0,36,822,71573,2801400,2801400,72395,2801400,53,0,b163ad0441c8a10074455029c10906cb5b434043,0,0,0,0,0,,New York,19798228.0,72395.0,822.0,71573.0
429,2020-06-11,NY,380892.0,2348113.0,,2042.0,89995.0,581.0,,424.0,,68211.0,A,6/11/2020 00:00,2020-06-11T00:00:00Z,06/10 20:00,24442.0,89995.0,2020-06-11 00:00:00+00:00,,,,380892.0,36,736,60103,2729005,2729005,60839,2729005,38,0,0fb149be1b73d602ac4593b54387e0547abe6206,0,0,0,0,0,,New York,19798228.0,60839.0,736.0,60103.0
485,2020-06-10,NY,380156.0,2288010.0,,2190.0,89995.0,630.0,,462.0,,68019.0,A,6/10/2020 00:00,2020-06-10T00:00:00Z,06/09 20:00,24404.0,89995.0,2020-06-10 00:00:00+00:00,,,,380156.0,36,674,61623,2668166,2668166,62297,2668166,56,0,722e71fad3e57cb807fbc0b220956ff9ffba50ca,0,0,0,0,0,,New York,19798228.0,62297.0,674.0,61623.0
541,2020-06-09,NY,379482.0,2226387.0,,2344.0,89995.0,663.0,,485.0,,67808.0,A,6/9/2020 00:00,2020-06-09T00:00:00Z,06/08 20:00,24348.0,89995.0,2020-06-09 00:00:00+00:00,,,,379482.0,36,683,49290,2605869,2605869,49973,2605869,49,0,8a5e0f38f81191c210d7ab38266b7e50755aa298,0,0,0,0,0,,New York,19798228.0,49973.0,683.0,49290.0


In [29]:
# show data types by column
combined_1.dtypes

date                                     object
state_abbr                               object
positive                                float64
negative                                float64
pending                                 float64
hospitalizedCurrently                   float64
hospitalizedCumulative                  float64
inIcuCurrently                          float64
inIcuCumulative                         float64
onVentilatorCurrently                   float64
onVentilatorCumulative                  float64
recovered                               float64
dataQualityGrade                         object
lastUpdateEt                             object
dateModified                             object
checkTimeEt                              object
death                                   float64
hospitalized                            float64
dateChecked                 datetime64[ns, UTC]
totalTestsViral                         float64
positiveTestsViral                      

In [30]:
# change population from object to numeric
combined_1['population'] = pd.to_numeric(combined_1['population'])

In [31]:
# tests per capita
combined_1['total_tests_per_person'] = combined_1['total'] / combined_1['population']

In [32]:
# show first five rows
combined_1.head()

Unnamed: 0,date,state_abbr,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,onVentilatorCumulative,recovered,dataQualityGrade,lastUpdateEt,dateModified,checkTimeEt,death,hospitalized,dateChecked,totalTestsViral,positiveTestsViral,negativeTestsViral,positiveCasesViral,fips,positiveIncrease,negativeIncrease,total,totalTestResults,totalTestResultsIncrease,posNeg,deathIncrease,hospitalizedIncrease,hash,commercialScore,negativeRegularScore,negativeScore,positiveScore,score,grade,state,population,tests_since_prev_day,positives_since_prev_day,negatives_since_prev_day,total_tests_per_person
0,2020-06-18,AK,708.0,80477.0,,18.0,,,,0.0,,449.0,A,6/18/2020 00:00,2020-06-18T00:00:00Z,06/17 20:00,12.0,,2020-06-18 00:00:00+00:00,81185.0,,,,2,12,3464,81185,81185,3476,81185,0,0,db312ffaaa17d8bf8435e7fa3f58db1ece2567ff,0,0,0,0,0,,Alaska,738565.0,3476.0,12.0,3464.0,0.109923
1,2020-06-18,AL,28206.0,294328.0,,663.0,2373.0,,695.0,,405.0,15974.0,B,6/18/2020 11:00,2020-06-18T11:00:00Z,06/18 07:00,810.0,2373.0,2020-06-18 11:00:00+00:00,,,,27796.0,1,894,10917,322534,322534,11811,322534,20,21,51542bad98c97fde34e7dc5472ef9416c6822a69,0,0,0,0,0,,Alabama,4850771.0,11811.0,894.0,10917.0,0.066491
2,2020-06-18,AR,13928.0,214506.0,,226.0,1074.0,,,53.0,173.0,9376.0,B,6/18/2020 15:10,2020-06-18T15:10:00Z,06/18 11:10,208.0,1074.0,2020-06-18 15:10:00+00:00,,,,13928.0,5,322,7413,228434,228434,7735,228434,11,22,18689a56d425e2051c51c144e355569ce4786172,0,0,0,0,0,,Arkansas,2977944.0,7735.0,322.0,7413.0,0.076709
3,2020-06-18,AS,0.0,174.0,,,,,,,,,C,6/1/2020 00:00,2020-06-01T00:00:00Z,05/31 20:00,0.0,,2020-06-01 00:00:00+00:00,,,,,60,0,0,174,174,0,174,0,0,b514616f295b79fb24ef69691cc1f69261b6182a,0,0,0,0,0,,,,,,,
4,2020-06-18,AZ,43443.0,336289.0,,1667.0,3956.0,540.0,,341.0,,6863.0,A+,6/18/2020 00:00,2020-06-18T00:00:00Z,06/17 20:00,1271.0,3956.0,2020-06-18 00:00:00+00:00,379374.0,,,43085.0,4,2519,11020,379732,379732,13539,379732,32,95,daaa2730525a7a7acc7d1498d3145f89c28d1ef1,0,0,0,0,0,,Arizona,6809946.0,13539.0,2519.0,11020.0,0.055761


In [33]:
# get today's date
today = datetime.today().strftime('%Y-%m-%d')
#today = '2020-04-25'

In [34]:
# filter date to today and sort states by highest test per capita
combined_1[combined_1['date'].isin([today])].sort_values(by = "total_tests_per_person", ascending = False).head()

Unnamed: 0,date,state_abbr,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,onVentilatorCumulative,recovered,dataQualityGrade,lastUpdateEt,dateModified,checkTimeEt,death,hospitalized,dateChecked,totalTestsViral,positiveTestsViral,negativeTestsViral,positiveCasesViral,fips,positiveIncrease,negativeIncrease,total,totalTestResults,totalTestResultsIncrease,posNeg,deathIncrease,hospitalizedIncrease,hash,commercialScore,negativeRegularScore,negativeScore,positiveScore,score,grade,state,population,tests_since_prev_day,positives_since_prev_day,negatives_since_prev_day,total_tests_per_person
43,2020-06-18,RI,16269.0,195324.0,,126.0,1911.0,23.0,,13.0,,1502.0,A+,6/18/2020 12:00,2020-06-18T12:00:00Z,06/18 08:00,885.0,1911.0,2020-06-18 12:00:00+00:00,,,,16269.0,44,56,5067,211593,211593,5123,211593,9,13,a0053153aae0d686d4e0dd2b807e31f40ebdf19b,0,0,0,0,0,,Rhode Island,1056138.0,5123.0,56.0,5067.0,0.200346
37,2020-06-18,NY,385760.0,2793900.0,,1358.0,89995.0,388.0,,278.0,,69243.0,A,6/18/2020 00:00,2020-06-18T00:00:00Z,06/17 20:00,24661.0,89995.0,2020-06-18 00:00:00+00:00,,,,385760.0,36,618,67923,3179660,3179660,68541,3179660,32,0,310b7a8b0acb1d880a8f8dff6f7bb8a7c456036b,0,0,0,0,0,,New York,19798228.0,68541.0,618.0,67923.0,0.160603
35,2020-06-18,NM,10065.0,265832.0,,161.0,1715.0,,,,,4351.0,C,6/17/2020 18:45,2020-06-17T18:45:00Z,06/17 14:45,452.0,1715.0,2020-06-17 18:45:00+00:00,,,,10065.0,35,132,4212,275897,275897,4344,275897,5,17,0ac15a5d685ac245483dda7cbee02e4d3e649cc2,0,0,0,0,0,,New Mexico,2084828.0,4344.0,132.0,4212.0,0.132336
34,2020-06-18,NJ,168107.0,1003627.0,,1258.0,19010.0,319.0,,257.0,,29101.0,A+,6/18/2020 13:00,2020-06-18T13:00:00Z,06/18 09:00,12800.0,19010.0,2020-06-18 13:00:00+00:00,,,,168107.0,34,404,23489,1171734,1171734,23893,1171734,31,0,6dcf04ad378ab1af177e7d138b834eee15057bdf,0,0,0,0,0,,New Jersey,8960161.0,23893.0,404.0,23489.0,0.130772
31,2020-06-18,ND,3193.0,87461.0,,26.0,208.0,,,,,2809.0,D,6/18/2020 13:00,2020-06-18T13:00:00Z,06/18 09:00,81.0,208.0,2020-06-18 13:00:00+00:00,144282.0,,,3193.0,38,27,953,90654,90654,980,90654,4,7,cd12bd0a7ab1bedbe559f5fd6d0cc7c9824d16d5,0,0,0,0,0,,North Dakota,745475.0,980.0,27.0,953.0,0.121606


In [35]:
# determine positive rate per tests
combined_1['positive_per_test'] = combined_1['positives_since_prev_day'] / combined_1['tests_since_prev_day']
combined_1.head()

Unnamed: 0,date,state_abbr,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,onVentilatorCumulative,recovered,dataQualityGrade,lastUpdateEt,dateModified,checkTimeEt,death,hospitalized,dateChecked,totalTestsViral,positiveTestsViral,negativeTestsViral,positiveCasesViral,fips,positiveIncrease,negativeIncrease,total,totalTestResults,totalTestResultsIncrease,posNeg,deathIncrease,hospitalizedIncrease,hash,commercialScore,negativeRegularScore,negativeScore,positiveScore,score,grade,state,population,tests_since_prev_day,positives_since_prev_day,negatives_since_prev_day,total_tests_per_person,positive_per_test
0,2020-06-18,AK,708.0,80477.0,,18.0,,,,0.0,,449.0,A,6/18/2020 00:00,2020-06-18T00:00:00Z,06/17 20:00,12.0,,2020-06-18 00:00:00+00:00,81185.0,,,,2,12,3464,81185,81185,3476,81185,0,0,db312ffaaa17d8bf8435e7fa3f58db1ece2567ff,0,0,0,0,0,,Alaska,738565.0,3476.0,12.0,3464.0,0.109923,0.003452
1,2020-06-18,AL,28206.0,294328.0,,663.0,2373.0,,695.0,,405.0,15974.0,B,6/18/2020 11:00,2020-06-18T11:00:00Z,06/18 07:00,810.0,2373.0,2020-06-18 11:00:00+00:00,,,,27796.0,1,894,10917,322534,322534,11811,322534,20,21,51542bad98c97fde34e7dc5472ef9416c6822a69,0,0,0,0,0,,Alabama,4850771.0,11811.0,894.0,10917.0,0.066491,0.075692
2,2020-06-18,AR,13928.0,214506.0,,226.0,1074.0,,,53.0,173.0,9376.0,B,6/18/2020 15:10,2020-06-18T15:10:00Z,06/18 11:10,208.0,1074.0,2020-06-18 15:10:00+00:00,,,,13928.0,5,322,7413,228434,228434,7735,228434,11,22,18689a56d425e2051c51c144e355569ce4786172,0,0,0,0,0,,Arkansas,2977944.0,7735.0,322.0,7413.0,0.076709,0.041629
3,2020-06-18,AS,0.0,174.0,,,,,,,,,C,6/1/2020 00:00,2020-06-01T00:00:00Z,05/31 20:00,0.0,,2020-06-01 00:00:00+00:00,,,,,60,0,0,174,174,0,174,0,0,b514616f295b79fb24ef69691cc1f69261b6182a,0,0,0,0,0,,,,,,,,
4,2020-06-18,AZ,43443.0,336289.0,,1667.0,3956.0,540.0,,341.0,,6863.0,A+,6/18/2020 00:00,2020-06-18T00:00:00Z,06/17 20:00,1271.0,3956.0,2020-06-18 00:00:00+00:00,379374.0,,,43085.0,4,2519,11020,379732,379732,13539,379732,32,95,daaa2730525a7a7acc7d1498d3145f89c28d1ef1,0,0,0,0,0,,Arizona,6809946.0,13539.0,2519.0,11020.0,0.055761,0.186055


# Display a few select states

In [36]:
# look at NY over time
combined_1[combined_1['state'] == 'New York'].head(10)

Unnamed: 0,date,state_abbr,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,onVentilatorCumulative,recovered,dataQualityGrade,lastUpdateEt,dateModified,checkTimeEt,death,hospitalized,dateChecked,totalTestsViral,positiveTestsViral,negativeTestsViral,positiveCasesViral,fips,positiveIncrease,negativeIncrease,total,totalTestResults,totalTestResultsIncrease,posNeg,deathIncrease,hospitalizedIncrease,hash,commercialScore,negativeRegularScore,negativeScore,positiveScore,score,grade,state,population,tests_since_prev_day,positives_since_prev_day,negatives_since_prev_day,total_tests_per_person,positive_per_test
37,2020-06-18,NY,385760.0,2793900.0,,1358.0,89995.0,388.0,,278.0,,69243.0,A,6/18/2020 00:00,2020-06-18T00:00:00Z,06/17 20:00,24661.0,89995.0,2020-06-18 00:00:00+00:00,,,,385760.0,36,618,67923,3179660,3179660,68541,3179660,32,0,310b7a8b0acb1d880a8f8dff6f7bb8a7c456036b,0,0,0,0,0,,New York,19798228.0,68541.0,618.0,67923.0,0.160603,0.009017
93,2020-06-17,NY,385142.0,2725977.0,,1479.0,89995.0,431.0,,304.0,,69085.0,A,6/17/2020 00:00,2020-06-17T00:00:00Z,06/16 20:00,24629.0,89995.0,2020-06-17 00:00:00+00:00,,,,385142.0,36,567,58774,3111119,3111119,59341,3111119,21,0,3a6e13e08b23cad07516ed3f4a0701b3946aee59,0,0,0,0,0,,New York,19798228.0,59341.0,567.0,58774.0,0.157141,0.009555
149,2020-06-16,NY,384575.0,2667203.0,,1538.0,89995.0,449.0,,303.0,,68938.0,A,6/16/2020 00:00,2020-06-16T00:00:00Z,06/15 20:00,24608.0,89995.0,2020-06-16 00:00:00+00:00,,,,384575.0,36,631,59937,3051778,3051778,60568,3051778,29,0,7278f5d47d05271633bcff6507a1313ce68703a7,0,0,0,0,0,,New York,19798228.0,60568.0,631.0,59937.0,0.154144,0.010418
205,2020-06-15,NY,383944.0,2607266.0,,1608.0,89995.0,470.0,,323.0,,68851.0,A,6/15/2020 00:00,2020-06-15T00:00:00Z,06/14 20:00,24579.0,89995.0,2020-06-15 00:00:00+00:00,,,,383944.0,36,620,55991,2991210,2991210,56611,2991210,28,0,687804ec7fbe947b02eb239a57cc805aec4883c9,0,0,0,0,0,,New York,19798228.0,56611.0,620.0,55991.0,0.151085,0.010952
261,2020-06-14,NY,383324.0,2551275.0,,1657.0,89995.0,499.0,,346.0,,68761.0,A,6/13/2020 00:00,2020-06-13T00:00:00Z,06/12 20:00,24551.0,89995.0,2020-06-13 00:00:00+00:00,,,,383324.0,36,694,61665,2934599,2934599,62359,2934599,24,0,f807fc3c2646802e57ff5f1b55a02e67139c1bd2,0,0,0,0,0,,New York,19798228.0,62359.0,694.0,61665.0,0.148225,0.011129
317,2020-06-13,NY,382630.0,2489610.0,,1734.0,89995.0,517.0,,360.0,,68584.0,A,6/12/2020 00:00,2020-06-12T00:00:00Z,06/11 20:00,24527.0,89995.0,2020-06-12 00:00:00+00:00,,,,382630.0,36,916,69924,2872240,2872240,70840,2872240,32,0,fc7045a058c6900c23e63fea1169e0001b87fe29,0,0,0,0,0,,New York,19798228.0,70840.0,916.0,69924.0,0.145076,0.012931
373,2020-06-12,NY,381714.0,2419686.0,,1898.0,89995.0,552.0,,387.0,,68399.0,A,6/12/2020 00:00,2020-06-12T00:00:00Z,06/11 20:00,24495.0,89995.0,2020-06-12 00:00:00+00:00,,,,381714.0,36,822,71573,2801400,2801400,72395,2801400,53,0,b163ad0441c8a10074455029c10906cb5b434043,0,0,0,0,0,,New York,19798228.0,72395.0,822.0,71573.0,0.141498,0.011354
429,2020-06-11,NY,380892.0,2348113.0,,2042.0,89995.0,581.0,,424.0,,68211.0,A,6/11/2020 00:00,2020-06-11T00:00:00Z,06/10 20:00,24442.0,89995.0,2020-06-11 00:00:00+00:00,,,,380892.0,36,736,60103,2729005,2729005,60839,2729005,38,0,0fb149be1b73d602ac4593b54387e0547abe6206,0,0,0,0,0,,New York,19798228.0,60839.0,736.0,60103.0,0.137841,0.012098
485,2020-06-10,NY,380156.0,2288010.0,,2190.0,89995.0,630.0,,462.0,,68019.0,A,6/10/2020 00:00,2020-06-10T00:00:00Z,06/09 20:00,24404.0,89995.0,2020-06-10 00:00:00+00:00,,,,380156.0,36,674,61623,2668166,2668166,62297,2668166,56,0,722e71fad3e57cb807fbc0b220956ff9ffba50ca,0,0,0,0,0,,New York,19798228.0,62297.0,674.0,61623.0,0.134768,0.010819
541,2020-06-09,NY,379482.0,2226387.0,,2344.0,89995.0,663.0,,485.0,,67808.0,A,6/9/2020 00:00,2020-06-09T00:00:00Z,06/08 20:00,24348.0,89995.0,2020-06-09 00:00:00+00:00,,,,379482.0,36,683,49290,2605869,2605869,49973,2605869,49,0,8a5e0f38f81191c210d7ab38266b7e50755aa298,0,0,0,0,0,,New York,19798228.0,49973.0,683.0,49290.0,0.131621,0.013667


In [37]:
# look at NY over time
#combined_1[combined_1['state'] == 'New Jersey']

In [38]:
# look at WA over time
#combined_1[combined_1['state'] == 'Washington']

In [39]:
# look at VA over time
#combined_1[combined_1['state'] == 'Virginia']

In [40]:
# look at MD over time
#combined_1[combined_1['state'] == 'Maryland']

In [41]:
# look at DC over time
#combined_1[combined_1['state'] == 'District of Columbia']

In [42]:
# look at LA over time
#combined_1[combined_1['state'] == 'Louisiana']

# Finalize US National Data

In [43]:
# dislay us pop
us_pop

Unnamed: 0,country,population
0,United States,321004407


In [44]:
# display first five rows of national dataset
national.head()

Unnamed: 0,date,states,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,onVentilatorCumulative,recovered,dateChecked,death,hospitalized,lastModified,total,totalTestResults,posNeg,deathIncrease,hospitalizedIncrease,negativeIncrease,positiveIncrease,totalTestResultsIncrease,hash
0,20200618,56,2177888,23225610.0,1903.0,28185.0,229212.0,5458.0,9736.0,2520.0,869.0,599115.0,2020-06-18T00:00:00Z,112172.0,229212.0,2020-06-18T00:00:00Z,25405401,25403498,25403498,695,1118,438665,26956,465621,41a07957a7fbc6a585f20d9e35ed0842f25fb496
1,20200617,56,2150932,22786945.0,1745.0,28311.0,228094.0,5605.0,9665.0,2588.0,857.0,592191.0,2020-06-17T00:00:00Z,111477.0,228094.0,2020-06-17T00:00:00Z,24939622,24937877,24937877,782,1168,464685,23885,488570,1490f02ca32e495de23f42c687453dec327388eb
2,20200616,56,2127047,22322260.0,1604.0,28028.0,226926.0,5564.0,9590.0,2594.0,845.0,583503.0,2020-06-16T00:00:00Z,110695.0,226926.0,2020-06-16T00:00:00Z,24450911,24449307,24449307,713,1356,441217,23498,464715,b76c38c5b2569c09d84b6559091245c2d92f6e3a
3,20200615,56,2103549,21881043.0,1619.0,27115.0,225570.0,5695.0,9516.0,2636.0,835.0,576334.0,2020-06-15T00:00:00Z,109982.0,225570.0,2020-06-15T00:00:00Z,23986211,23984592,23984592,375,509,430967,18521,449488,1d6b3f792d9ad6ab553ce0b8f358d96d70333629
4,20200614,56,2085028,21450076.0,1672.0,27247.0,225061.0,5743.0,9466.0,2716.0,834.0,561816.0,2020-06-14T00:00:00Z,109607.0,225061.0,2020-06-14T00:00:00Z,23536776,23535104,23535104,358,641,463596,21486,485082,168319cea57492b92c3e16221430b3b93c7cb866


In [45]:
# concat national dataset with the population
# code below duplicates us_pop by number of rows in national dataset to avoid NAs
national_final = pd.concat([national, 
                            pd.concat([us_pop.drop(['country'],
                                                   axis = 1)]*len(national), ignore_index = True)],
                           axis = 1)

# show first five rows
national_final.head()

Unnamed: 0,date,states,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,onVentilatorCumulative,recovered,dateChecked,death,hospitalized,lastModified,total,totalTestResults,posNeg,deathIncrease,hospitalizedIncrease,negativeIncrease,positiveIncrease,totalTestResultsIncrease,hash,population
0,20200618,56,2177888,23225610.0,1903.0,28185.0,229212.0,5458.0,9736.0,2520.0,869.0,599115.0,2020-06-18T00:00:00Z,112172.0,229212.0,2020-06-18T00:00:00Z,25405401,25403498,25403498,695,1118,438665,26956,465621,41a07957a7fbc6a585f20d9e35ed0842f25fb496,321004407
1,20200617,56,2150932,22786945.0,1745.0,28311.0,228094.0,5605.0,9665.0,2588.0,857.0,592191.0,2020-06-17T00:00:00Z,111477.0,228094.0,2020-06-17T00:00:00Z,24939622,24937877,24937877,782,1168,464685,23885,488570,1490f02ca32e495de23f42c687453dec327388eb,321004407
2,20200616,56,2127047,22322260.0,1604.0,28028.0,226926.0,5564.0,9590.0,2594.0,845.0,583503.0,2020-06-16T00:00:00Z,110695.0,226926.0,2020-06-16T00:00:00Z,24450911,24449307,24449307,713,1356,441217,23498,464715,b76c38c5b2569c09d84b6559091245c2d92f6e3a,321004407
3,20200615,56,2103549,21881043.0,1619.0,27115.0,225570.0,5695.0,9516.0,2636.0,835.0,576334.0,2020-06-15T00:00:00Z,109982.0,225570.0,2020-06-15T00:00:00Z,23986211,23984592,23984592,375,509,430967,18521,449488,1d6b3f792d9ad6ab553ce0b8f358d96d70333629,321004407
4,20200614,56,2085028,21450076.0,1672.0,27247.0,225061.0,5743.0,9466.0,2716.0,834.0,561816.0,2020-06-14T00:00:00Z,109607.0,225061.0,2020-06-14T00:00:00Z,23536776,23535104,23535104,358,641,463596,21486,485082,168319cea57492b92c3e16221430b3b93c7cb866,321004407


In [46]:
# check last five rows
national_final.tail()

Unnamed: 0,date,states,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,onVentilatorCumulative,recovered,dateChecked,death,hospitalized,lastModified,total,totalTestResults,posNeg,deathIncrease,hospitalizedIncrease,negativeIncrease,positiveIncrease,totalTestResultsIncrease,hash,population
144,20200126,1,1,,,,,,,,,,2020-01-26T00:00:00Z,,,2020-01-26T00:00:00Z,1,1,1,0,0,0,0,0,defee0e146ae049c86c14a5e580c6219add8e92f,321004407
145,20200125,1,1,,,,,,,,,,2020-01-25T00:00:00Z,,,2020-01-25T00:00:00Z,1,1,1,0,0,0,0,0,ca2013db2fa6a2b1a2c776b85d5fcd5e1ca64722,321004407
146,20200124,1,1,,,,,,,,,,2020-01-24T00:00:00Z,,,2020-01-24T00:00:00Z,1,1,1,0,0,0,0,0,738e6d4a52f6d0895318ad9eda0d965d09feeb70,321004407
147,20200123,1,1,,,,,,,,,,2020-01-23T00:00:00Z,,,2020-01-23T00:00:00Z,1,1,1,0,0,0,0,0,680d664781c7080c1171f160145dafc31e3b93af,321004407
148,20200122,1,1,,,,,,,,,,2020-01-22T00:00:00Z,,,2020-01-22T00:00:00Z,1,1,1,0,0,0,0,0,615132d06fd9e22bc21b1b3ec47fb9b2a0de0734,321004407


In [47]:
national_final[:1].stack().reset_index()

Unnamed: 0,level_0,level_1,0
0,0,date,20200618
1,0,states,56
2,0,positive,2177888
3,0,negative,2.32256e+07
4,0,pending,1903
5,0,hospitalizedCurrently,28185
6,0,hospitalizedCumulative,229212
7,0,inIcuCurrently,5458
8,0,inIcuCumulative,9736
9,0,onVentilatorCurrently,2520


# Write to Excel

In [48]:
# running code below since excel does not support datetimes with timezones
combined_1['dateChecked'] = combined_1['dateChecked'].dt.strftime('%Y-%m-%d %H:%M:%S')

# show first five rows
combined_1.head()

Unnamed: 0,date,state_abbr,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,onVentilatorCumulative,recovered,dataQualityGrade,lastUpdateEt,dateModified,checkTimeEt,death,hospitalized,dateChecked,totalTestsViral,positiveTestsViral,negativeTestsViral,positiveCasesViral,fips,positiveIncrease,negativeIncrease,total,totalTestResults,totalTestResultsIncrease,posNeg,deathIncrease,hospitalizedIncrease,hash,commercialScore,negativeRegularScore,negativeScore,positiveScore,score,grade,state,population,tests_since_prev_day,positives_since_prev_day,negatives_since_prev_day,total_tests_per_person,positive_per_test
0,2020-06-18,AK,708.0,80477.0,,18.0,,,,0.0,,449.0,A,6/18/2020 00:00,2020-06-18T00:00:00Z,06/17 20:00,12.0,,2020-06-18 00:00:00,81185.0,,,,2,12,3464,81185,81185,3476,81185,0,0,db312ffaaa17d8bf8435e7fa3f58db1ece2567ff,0,0,0,0,0,,Alaska,738565.0,3476.0,12.0,3464.0,0.109923,0.003452
1,2020-06-18,AL,28206.0,294328.0,,663.0,2373.0,,695.0,,405.0,15974.0,B,6/18/2020 11:00,2020-06-18T11:00:00Z,06/18 07:00,810.0,2373.0,2020-06-18 11:00:00,,,,27796.0,1,894,10917,322534,322534,11811,322534,20,21,51542bad98c97fde34e7dc5472ef9416c6822a69,0,0,0,0,0,,Alabama,4850771.0,11811.0,894.0,10917.0,0.066491,0.075692
2,2020-06-18,AR,13928.0,214506.0,,226.0,1074.0,,,53.0,173.0,9376.0,B,6/18/2020 15:10,2020-06-18T15:10:00Z,06/18 11:10,208.0,1074.0,2020-06-18 15:10:00,,,,13928.0,5,322,7413,228434,228434,7735,228434,11,22,18689a56d425e2051c51c144e355569ce4786172,0,0,0,0,0,,Arkansas,2977944.0,7735.0,322.0,7413.0,0.076709,0.041629
3,2020-06-18,AS,0.0,174.0,,,,,,,,,C,6/1/2020 00:00,2020-06-01T00:00:00Z,05/31 20:00,0.0,,2020-06-01 00:00:00,,,,,60,0,0,174,174,0,174,0,0,b514616f295b79fb24ef69691cc1f69261b6182a,0,0,0,0,0,,,,,,,,
4,2020-06-18,AZ,43443.0,336289.0,,1667.0,3956.0,540.0,,341.0,,6863.0,A+,6/18/2020 00:00,2020-06-18T00:00:00Z,06/17 20:00,1271.0,3956.0,2020-06-18 00:00:00,379374.0,,,43085.0,4,2519,11020,379732,379732,13539,379732,32,95,daaa2730525a7a7acc7d1498d3145f89c28d1ef1,0,0,0,0,0,,Arizona,6809946.0,13539.0,2519.0,11020.0,0.055761,0.186055


In [49]:
combined_1.head(20)

Unnamed: 0,date,state_abbr,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,onVentilatorCumulative,recovered,dataQualityGrade,lastUpdateEt,dateModified,checkTimeEt,death,hospitalized,dateChecked,totalTestsViral,positiveTestsViral,negativeTestsViral,positiveCasesViral,fips,positiveIncrease,negativeIncrease,total,totalTestResults,totalTestResultsIncrease,posNeg,deathIncrease,hospitalizedIncrease,hash,commercialScore,negativeRegularScore,negativeScore,positiveScore,score,grade,state,population,tests_since_prev_day,positives_since_prev_day,negatives_since_prev_day,total_tests_per_person,positive_per_test
0,2020-06-18,AK,708.0,80477.0,,18.0,,,,0.0,,449.0,A,6/18/2020 00:00,2020-06-18T00:00:00Z,06/17 20:00,12.0,,2020-06-18 00:00:00,81185.0,,,,2,12,3464,81185,81185,3476,81185,0,0,db312ffaaa17d8bf8435e7fa3f58db1ece2567ff,0,0,0,0,0,,Alaska,738565.0,3476.0,12.0,3464.0,0.109923,0.003452
1,2020-06-18,AL,28206.0,294328.0,,663.0,2373.0,,695.0,,405.0,15974.0,B,6/18/2020 11:00,2020-06-18T11:00:00Z,06/18 07:00,810.0,2373.0,2020-06-18 11:00:00,,,,27796.0,1,894,10917,322534,322534,11811,322534,20,21,51542bad98c97fde34e7dc5472ef9416c6822a69,0,0,0,0,0,,Alabama,4850771.0,11811.0,894.0,10917.0,0.066491,0.075692
2,2020-06-18,AR,13928.0,214506.0,,226.0,1074.0,,,53.0,173.0,9376.0,B,6/18/2020 15:10,2020-06-18T15:10:00Z,06/18 11:10,208.0,1074.0,2020-06-18 15:10:00,,,,13928.0,5,322,7413,228434,228434,7735,228434,11,22,18689a56d425e2051c51c144e355569ce4786172,0,0,0,0,0,,Arkansas,2977944.0,7735.0,322.0,7413.0,0.076709,0.041629
3,2020-06-18,AS,0.0,174.0,,,,,,,,,C,6/1/2020 00:00,2020-06-01T00:00:00Z,05/31 20:00,0.0,,2020-06-01 00:00:00,,,,,60,0,0,174,174,0,174,0,0,b514616f295b79fb24ef69691cc1f69261b6182a,0,0,0,0,0,,,,,,,,
4,2020-06-18,AZ,43443.0,336289.0,,1667.0,3956.0,540.0,,341.0,,6863.0,A+,6/18/2020 00:00,2020-06-18T00:00:00Z,06/17 20:00,1271.0,3956.0,2020-06-18 00:00:00,379374.0,,,43085.0,4,2519,11020,379732,379732,13539,379732,32,95,daaa2730525a7a7acc7d1498d3145f89c28d1ef1,0,0,0,0,0,,Arizona,6809946.0,13539.0,2519.0,11020.0,0.055761,0.186055
5,2020-06-18,CA,161099.0,2913431.0,,4584.0,,1298.0,,,,,B,6/18/2020 00:00,2020-06-18T00:00:00Z,06/17 20:00,5290.0,,2020-06-18 00:00:00,3074530.0,,,161099.0,6,4084,72458,3074530,3074530,76542,3074530,82,0,d468f264a4f7122948f452c8bb457ac6eb303a21,0,0,0,0,0,,California,38982847.0,76542.0,4084.0,72458.0,0.078869,0.053356
6,2020-06-18,CO,29673.0,232543.0,,270.0,5294.0,,,,,4253.0,A,6/17/2020 00:00,2020-06-17T00:00:00Z,06/16 20:00,1631.0,5294.0,2020-06-17 00:00:00,,,,27003.0,8,231,5318,262216,262216,5549,262216,14,22,188d26c26682079c89b109e45691f9056f041061,0,0,0,0,0,,Colorado,5436519.0,5549.0,231.0,5318.0,0.048232,0.041629
7,2020-06-18,CT,45440.0,327145.0,,176.0,10099.0,,,,,7842.0,B,6/17/2020 20:30,2020-06-17T20:30:00Z,06/17 16:30,4226.0,10099.0,2020-06-17 20:30:00,370638.0,,,43493.0,9,11,4716,372585,372585,4727,372585,7,187,77b849444aa079b6404afd063fc839e430ee1b2c,0,0,0,0,0,,Connecticut,3594478.0,4727.0,11.0,4716.0,0.103655,0.002327
8,2020-06-18,DC,9903.0,62296.0,,189.0,,86.0,,45.0,,1155.0,A+,6/17/2020 00:00,2020-06-17T00:00:00Z,06/16 20:00,527.0,,2020-06-17 00:00:00,,,,9903.0,11,56,2057,72199,72199,2113,72199,4,0,f8fd35364a4e8bdb08eff1522d6d257c483b11a7,0,0,0,0,0,,District of Columbia,672391.0,2113.0,56.0,2057.0,0.107377,0.026503
9,2020-06-18,DE,10499.0,78185.0,,79.0,,,,,,6350.0,A,6/17/2020 18:00,2020-06-17T18:00:00Z,06/17 14:00,431.0,,2020-06-17 18:00:00,,,,9524.0,10,55,2042,88684,88684,2097,88684,5,0,0cd98b77e825976795b4e290c1b0c72dda857a49,0,0,0,0,0,,Delaware,943732.0,2097.0,55.0,2042.0,0.093972,0.026228


In [50]:
# separate the most recent day into its own tab
#today = '2020-04-03'
most_recent_day = combined_1[combined_1['date'].isin([today])]
most_recent_day[most_recent_day.state_abbr.isin(['NY', 'NJ', 'DC'])]

Unnamed: 0,date,state_abbr,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,onVentilatorCumulative,recovered,dataQualityGrade,lastUpdateEt,dateModified,checkTimeEt,death,hospitalized,dateChecked,totalTestsViral,positiveTestsViral,negativeTestsViral,positiveCasesViral,fips,positiveIncrease,negativeIncrease,total,totalTestResults,totalTestResultsIncrease,posNeg,deathIncrease,hospitalizedIncrease,hash,commercialScore,negativeRegularScore,negativeScore,positiveScore,score,grade,state,population,tests_since_prev_day,positives_since_prev_day,negatives_since_prev_day,total_tests_per_person,positive_per_test
8,2020-06-18,DC,9903.0,62296.0,,189.0,,86.0,,45.0,,1155.0,A+,6/17/2020 00:00,2020-06-17T00:00:00Z,06/16 20:00,527.0,,2020-06-17 00:00:00,,,,9903.0,11,56,2057,72199,72199,2113,72199,4,0,f8fd35364a4e8bdb08eff1522d6d257c483b11a7,0,0,0,0,0,,District of Columbia,672391.0,2113.0,56.0,2057.0,0.107377,0.026503
34,2020-06-18,NJ,168107.0,1003627.0,,1258.0,19010.0,319.0,,257.0,,29101.0,A+,6/18/2020 13:00,2020-06-18T13:00:00Z,06/18 09:00,12800.0,19010.0,2020-06-18 13:00:00,,,,168107.0,34,404,23489,1171734,1171734,23893,1171734,31,0,6dcf04ad378ab1af177e7d138b834eee15057bdf,0,0,0,0,0,,New Jersey,8960161.0,23893.0,404.0,23489.0,0.130772,0.016909
37,2020-06-18,NY,385760.0,2793900.0,,1358.0,89995.0,388.0,,278.0,,69243.0,A,6/18/2020 00:00,2020-06-18T00:00:00Z,06/17 20:00,24661.0,89995.0,2020-06-18 00:00:00,,,,385760.0,36,618,67923,3179660,3179660,68541,3179660,32,0,310b7a8b0acb1d880a8f8dff6f7bb8a7c456036b,0,0,0,0,0,,New York,19798228.0,68541.0,618.0,67923.0,0.160603,0.009017


In [51]:
# to excel
with pd.ExcelWriter('../outputs/States_Daily_4PM.xlsx') as writer:
    combined_1.to_excel(writer, sheet_name = 'States Daily 4PM', index = False)
    most_recent_day.to_excel(writer, sheet_name = 'Most Recent Day', index = False)
    national_final.to_excel(writer, sheet_name = 'National', index = False)