# Initialize

In [1]:
#  Python Modules
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
# import matplotlib.ticker as mtick
import numpy as np
# import requests
import json
import math
import itertools
from cycler import cycler
import plotly

import imp

# Custom Modules
import CensusData
import CovidData
import CovidFunctions
import CovidFunctionsPlotly

# imp.reload(CovidData)

# Covid Data    
https://covidtracking.com/api  
https://github.com/nytimes/covid-19-data  
https://www.apple.com/covid19/mobility  
https://www.google.com/covid19/mobility/  

## Cool Visulizations
https://www.nytimes.com/interactive/2020/05/05/us/coronavirus-death-toll-us.html
https://www.nytimes.com/interactive/2020/us/states-reopen-map-coronavirus.html
https://www.nytimes.com/interactive/2020/04/03/upshot/coronavirus-metro-area-tracker.html
https://www.nytimes.com/interactive/2020/04/23/upshot/five-ways-to-monitor-coronavirus-outbreak-us.html  
https://91-divoc.com/pages/covid-visualization/  
https://www.covidexitstrategy.org/

## Posible new sources
Data "coming soon" https://tracktherecovery.org/  
https://www.opendatanetwork.com/  
https://www.cdc.gov/library/researchguides/2019NovelCoronavirus.html  
https://coronavirus.jhu.edu/  
https://www.who.int/emergencies/diseases/novel-coronavirus-2019/global-research-on-novel-coronavirus-2019-ncov  
https://www.who.int/emergencies/diseases/novel-coronavirus-2019/global-research-on-novel-coronavirus-2019-ncov  
https://www.tableau.com/covid-19-coronavirus-data-resources  
https://www.tableau.com/covid-19-coronavirus-data-resources/data-sources  
https://www.kff.org/health-costs/issue-brief/state-data-and-policy-actions-to-address-coronavirus/#socialdistancing  
https://data.humdata.org/dataset/acaps-covid19-government-measures-dataset  
Ideas and projects with data: https://github.com/CSSEGISandData/COVID-19/issues/2389


## Download and Import Data

In [2]:
# Census Data
dfStateData = CensusData.StateData()
# Add USA to State Data with pop equal sum of all states
dfStateData.loc['00'] = {'State': 'USA', 'Population': pd.to_numeric(dfStateData['Population']).sum()}

# Covid Data
dfCovid = CovidData.combine_data()

# State Level Events
dfEvents = pd.read_csv('Events.txt',parse_dates=[1])

# CDC Death Data
dfCDCdeaths = CovidData.cdc_death_data(dfStateData)

# Google and Apple mobility data
dfMobility = CovidData.mobility_data(dfStateData)

https://api.census.gov/data/2019/pep/population?get=POP&for=state:*
Population data API call result: 200 OK
https://api.census.gov/data/2010/dec/sf1?get=NAME&for=state:*
State name API call result: 200 OK
https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv
https://covidtracking.com/api/v1/states/daily.csv
Starting download CDC weekely death data...
Downloaded CDC weekly death data for every state from 2014-present.



Columns (4) have mixed types.Specify dtype option on import or set low_memory=False.



Imported Apple and Google Mobility Reports
USA
Collected Apple USA data
Collected Google USA data
Created combined mobility report


In [None]:
imp.reload(CovidData)
dfMobility = CovidData.mobility_data(dfStateData)

# Data Analysis

## Create USA Total Data

In [3]:
# Add total reported CDC deaths for USA for each week
dfCDCusa = pd.DataFrame(dfCDCdeaths.groupby(['week', 'year'])['allcause'].sum()).reset_index()
dfCDCusa['state'] = 'USA'
dfCDCusa['FIPS'] = '00'
# dfCDCusa['weekendingdate'] = /add if needed later
dfCDCdeaths = dfCDCdeaths.append(dfCDCusa)

# For each day, sum columns of dfCovid
dfCovidUSA = pd.DataFrame(dfCovid.groupby('date').sum(min_count=1)).reset_index()
dfCovidUSA['fips'] = '00'
dfCovidUSA.set_index(['fips', 'date'], inplace = True)
dfCovidUSA['cases(NYT)'][dfCovidUSA['cases(NYT)'] == 0] = np.nan


dfCovid = dfCovid.append(dfCovidUSA)

## Merge Data

In [4]:
# Average Multiple 
col = dfCovid.loc[: , ["cases(NYT)","positive(CTP)"]]
dfCovid['positive_cases'] = col.mean(axis=1)

col = dfCovid.loc[: , ["deaths(NYT)","death(CTP)"]]
dfCovid['deaths'] = col.mean(axis=1)

# Covid Deaths by week number
dfCovid['week'] = [(index.isocalendar()[1] + index.isocalendar()[2] / 7) for index in dfCovid.index.get_level_values(1)]

print(dfCovid.columns.values)

['cases(NYT)' 'deaths(NYT)' 'positive(CTP)' 'negative(CTP)' 'pending(CTP)'
 'hospitalizedCurrently(CTP)' 'hospitalizedCumulative(CTP)'
 'inIcuCurrently(CTP)' 'inIcuCumulative(CTP)' 'onVentilatorCurrently(CTP)'
 'onVentilatorCumulative(CTP)' 'recovered(CTP)' 'dataQualityGrade(CTP)'
 'lastUpdateEt(CTP)' 'dateModified(CTP)' 'checkTimeEt(CTP)' 'death(CTP)'
 'hospitalized(CTP)' 'dateChecked(CTP)' 'totalTestsViral(CTP)'
 'positiveTestsViral(CTP)' 'negativeTestsViral(CTP)'
 'positiveCasesViral(CTP)' 'deathConfirmed(CTP)' 'deathProbable(CTP)'
 'positiveIncrease(CTP)' 'negativeIncrease(CTP)' 'totalTestResults(CTP)'
 'totalTestResultsIncrease(CTP)' 'deathIncrease(CTP)'
 'hospitalizedIncrease(CTP)' 'commercialScore(CTP)'
 'negativeRegularScore(CTP)' 'negativeScore(CTP)' 'positiveScore(CTP)'
 'score(CTP)' 'grade(CTP)' 'positive_cases' 'deaths' 'week']


In [5]:
fipsList = dfCovid.index.get_level_values(0).unique().astype(int).sort_values().astype(str)
fipsList = fipsList.tolist()
fipsList[0] = '00'

firstDate = dfCovid.index.get_level_values(1).min()
currentDate = dfCovid.index.get_level_values(1).max()
plotDateRange = [firstDate, currentDate]    

## Deaths correlated to cases from previous days

In [6]:
shiftSearch = 30 # NUmber of days to correlate to

dfShiftCor = CovidFunctions.correlations(shiftSearch, dfCovid, fipsList)


divide by zero encountered in log



Completed 30 days of case-death correlations and auto-correlations.


## State level analysis

In [12]:
# imp.reload(CovidFunctions)
imp.reload(CovidFunctionsPlotly)

# Create plots for each state
for fips in fipsList:
    if int(fips) in [int(i) for i in dfStateData.index.tolist()]:
#         CovidFunctions.state_plot(dfCovid, dfShiftCor, dfStateData, dfEvents, fips, plotDateRange)
#         plt.close()
        CovidFunctionsPlotly.state_plot(dfCovid, dfShiftCor, dfStateData, dfEvents, 
                                        dfCDCdeaths, dfMobility, fips, plotDateRange)
    else:
        print('Did not create plot for FIPS = ' + fips)
        
# Update index.md
CovidFunctionsPlotly.githubIndex(dfStateData, fipsList)
        
print('Finished Creating all State plots')

Did not create plot for FIPS = 60
Did not create plot for FIPS = 66
Did not create plot for FIPS = 69
Did not create plot for FIPS = 78
Finished Creating all State plots


### Individual State

In [64]:
imp.reload(CovidFunctionsPlotly)
fips = '00'

CovidFunctionsPlotly.state_plot(dfCovid, dfShiftCor, dfStateData, dfEvents, dfCDCdeaths, dfMobility, fips, plotDateRange)

In [None]:
imp.reload(CovidFunctions)

fips = '25'
# plt.plot(dfCovid.loc[fips]['positive_cases'])

CovidFunctions.state_plot(dfCovid, dfShiftCor, dfStateData, dfEvents, fips, plotDateRange)


In [None]:
imp.reload(CovidFunctionsPlotly)
fips = '25'
fig = CovidFunctionsPlotly.cdc_deaths_plot(dfCDCdeaths, dfCovid, dfStateData, fips)
# fig = CovidFunctionsPlotly.event_markers(fig, dfEventsState)

fig.show()


In [28]:
# Update index.md
imp.reload(CovidFunctionsPlotly)
CovidFunctionsPlotly.githubIndex(dfStateData, fipsList)

In [None]:
imp.reload(CovidData)
dfCDCdeaths = CovidData.cdc_death_data(dfStateData)


In [41]:
maxDate = dfCovid.index.get_level_values(1).max()

firstIndex = dfCovid.index.get_level_values(0)
secondIndex = dfCovid.index.get_level_values(1)

matchDate = secondIndex == maxDate

# idx = pd.IndexSlice

# dfCovid[np.in1d(dfCovid.index.get_level_values(1),maxDate)]

# dfCovid[dfCovid.index.get_level_values(1).isin(maxDate)]
dfCovid[matchDate]

Unnamed: 0_level_0,Unnamed: 1_level_0,cases(NYT),checkTimeEt(CTP),commercialScore(CTP),dataQualityGrade(CTP),dateChecked(CTP),dateModified(CTP),death(CTP),deathConfirmed(CTP),deathIncrease(CTP),deathProbable(CTP),...,positiveScore(CTP),positiveTestsViral(CTP),recovered(CTP),score(CTP),totalTestResults(CTP),totalTestResultsIncrease(CTP),totalTestsViral(CTP),positive_cases,deaths,week
fips,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,2020-07-17,,07/17 07:00,0.0,B,2020-07-17T11:00:00Z,2020-07-17T11:00:00Z,1265.0,1232.0,35.0,33.0,...,0.0,,29736.0,0.0,563115.0,12006.0,562135.0,63091.0,1265.0,29.714286
10,2020-07-17,,07/16 14:00,0.0,A+,2020-07-16T18:00:00Z,2020-07-16T18:00:00Z,521.0,463.0,0.0,58.0,...,0.0,,7315.0,0.0,148610.0,3740.0,,13337.0,521.0,29.714286
11,2020-07-17,,07/15 20:00,0.0,A+,2020-07-16T00:00:00Z,2020-07-16T00:00:00Z,577.0,,3.0,,...,0.0,,1863.0,0.0,141607.0,1365.0,,11115.0,577.0,29.714286
12,2020-07-17,,07/16 19:59,0.0,A,2020-07-16T23:59:00Z,2020-07-16T23:59:00Z,4912.0,4912.0,130.0,,...,0.0,407133.0,,0.0,2880768.0,65150.0,3600738.0,327241.0,4912.0,29.714286
13,2020-07-17,,07/17 10:50,0.0,A,2020-07-17T14:50:00Z,2020-07-17T14:50:00Z,3132.0,,28.0,,...,0.0,123040.0,,0.0,1198678.0,20456.0,1198678.0,135183.0,3132.0,29.714286
15,2020-07-17,,07/16 14:00,0.0,D,2020-07-16T18:00:00Z,2020-07-16T18:00:00Z,22.0,,0.0,,...,0.0,,975.0,0.0,101204.0,1817.0,121512.0,1311.0,22.0,29.714286
16,2020-07-17,,07/16 15:00,0.0,A,2020-07-16T19:00:00Z,2020-07-16T19:00:00Z,114.0,93.0,4.0,21.0,...,0.0,,3676.0,0.0,140319.0,3456.0,139481.0,13133.0,114.0,29.714286
17,2020-07-17,,07/15 20:00,0.0,A,2020-07-16T00:00:00Z,2020-07-16T00:00:00Z,7465.0,7272.0,13.0,193.0,...,0.0,,,0.0,2167474.0,43735.0,2166299.0,160509.0,7465.0,29.714286
18,2020-07-17,,07/16 19:59,0.0,A+,2020-07-16T23:59:00Z,2020-07-16T23:59:00Z,2803.0,2610.0,8.0,193.0,...,0.0,,40421.0,0.0,604635.0,9077.0,,54813.0,2803.0,29.714286
19,2020-07-17,,07/16 20:00,0.0,A+,2020-07-17T00:00:00Z,2020-07-17T00:00:00Z,784.0,,6.0,,...,0.0,,27609.0,0.0,402453.0,6002.0,,37722.0,784.0,29.714286
