In [2]:
"""
From Corona Data Scraper:
https://coronadatascraper.com/#home

This dataset is really comprehensive, please consider using!

"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import git
repo = git.Repo("./", search_parent_directories=True)
homedir = repo.working_dir

In [3]:
# Geolocation and Population Data
df = pd.read_csv(f"{homedir}/data/international/covid/corona_data_scraper/geopop.csv", parse_dates=['date'])

# Or if you want to directly fetch the data from online instead of daily refresh
# df = pd.read_csv('https://coronadatascraper.com/timeseries.csv', parse_dates=['date'])


In [4]:
df.head(10)

Unnamed: 0,city,county,state,country,population,lat,long,url,cases,deaths,recovered,active,tested,growthFactor,date
0,,,,THA,68414135.0,13.040833,101.544556,https://github.com/CSSEGISandData/COVID-19,2.0,,,2.0,,,2020-01-22
1,,,,THA,68414135.0,13.040833,101.544556,https://github.com/CSSEGISandData/COVID-19,3.0,,,3.0,,1.5,2020-01-23
2,,,,THA,68414135.0,13.040833,101.544556,https://github.com/CSSEGISandData/COVID-19,5.0,,,5.0,,1.666667,2020-01-24
3,,,,THA,68414135.0,13.040833,101.544556,https://github.com/CSSEGISandData/COVID-19,7.0,,,7.0,,1.4,2020-01-25
4,,,,THA,68414135.0,13.040833,101.544556,https://github.com/CSSEGISandData/COVID-19,8.0,,2.0,6.0,,1.142857,2020-01-26
5,,,,THA,68414135.0,13.040833,101.544556,https://github.com/CSSEGISandData/COVID-19,8.0,,2.0,6.0,,1.0,2020-01-27
6,,,,THA,68414135.0,13.040833,101.544556,https://github.com/CSSEGISandData/COVID-19,14.0,,5.0,9.0,,1.75,2020-01-28
7,,,,THA,68414135.0,13.040833,101.544556,https://github.com/CSSEGISandData/COVID-19,14.0,,5.0,9.0,,1.0,2020-01-29
8,,,,THA,68414135.0,13.040833,101.544556,https://github.com/CSSEGISandData/COVID-19,14.0,,5.0,9.0,,1.0,2020-01-30
9,,,,THA,68414135.0,13.040833,101.544556,https://github.com/CSSEGISandData/COVID-19,19.0,,5.0,14.0,,1.357143,2020-01-31


In [5]:
# Sort By US discard the rest
us_data = df[df["country"] == "USA"]

In [6]:
us_data.head(10)

Unnamed: 0,city,county,state,country,population,lat,long,url,cases,deaths,recovered,active,tested,growthFactor,date
2945,,,WA,USA,7614893.0,47.279414,-120.806717,https://www.doh.wa.gov/Emergencies/Coronavirus,267.0,23.0,1.0,243.0,,,2020-03-10
2946,,,WA,USA,7614893.0,47.279414,-120.806717,https://www.doh.wa.gov/Emergencies/Coronavirus,366.0,29.0,1.0,336.0,,1.370787,2020-03-11
2947,,,WA,USA,7614893.0,47.279414,-120.806717,https://www.doh.wa.gov/Emergencies/Coronavirus,568.0,37.0,,,,,2020-03-13
2948,,,WA,USA,7614893.0,47.279414,-120.806717,https://www.doh.wa.gov/Emergencies/Coronavirus,642.0,40.0,,,,1.130282,2020-03-14
2949,,,WA,USA,7614893.0,47.279414,-120.806717,https://www.doh.wa.gov/Emergencies/Coronavirus,769.0,42.0,,,,1.197819,2020-03-15
2950,,,WA,USA,7614893.0,47.279414,-120.806717,https://www.doh.wa.gov/Emergencies/Coronavirus,904.0,48.0,,,,1.175553,2020-03-16
2951,,,WA,USA,7614893.0,47.279414,-120.806717,https://www.doh.wa.gov/Emergencies/Coronavirus,1012.0,52.0,,,,1.119469,2020-03-17
2952,,,WA,USA,7614893.0,47.279414,-120.806717,https://www.doh.wa.gov/Emergencies/Coronavirus,1187.0,66.0,,,,1.172925,2020-03-18
2953,,,WA,USA,7614893.0,47.279414,-120.806717,https://www.doh.wa.gov/Emergencies/Coronavirus,1375.0,73.0,,,,1.158382,2020-03-19
2954,,,WA,USA,7614893.0,47.279414,-120.806717,https://www.doh.wa.gov/Emergencies/Coronavirus,1522.0,82.0,,,,1.106909,2020-03-20


In [11]:
# County data and geolocations are now starting to get generated
recent_us_data = us_data.sort_values(by=['date'], ascending=False)

In [12]:
recent_us_data.head(10)

Unnamed: 0,city,county,state,country,population,lat,long,url,cases,deaths,recovered,active,tested,growthFactor,date
20962,,Pending County,TN,USA,,,,https://www.tn.gov/health/cedep/ncov.html,81.0,,,,,2.131579,2020-03-25
19619,,Delaware County,OK,USA,42733.0,36.41601,-94.787104,https://coronavirus.health.ok.gov/,1.0,,,,,,2020-03-25
20516,,Sullivan County,NH,USA,43144.0,43.364504,-72.193888,https://www.nh.gov/covid19/documents/case-map.pdf,1.0,,,,,1.0,2020-03-25
19719,,Calloway County,KY,USA,39135.0,36.623728,-88.260782,https://datawrapper.dwcdn.net/BbowM/23/,1.0,,,1.0,,1.0,2020-03-25
19714,,Breathitt County,KY,USA,12726.0,37.516053,-83.264173,https://datawrapper.dwcdn.net/BbowM/23/,2.0,,,2.0,,1.0,2020-03-25
20520,,Cheshire County,NH,USA,76493.0,42.947098,-72.243029,https://www.nh.gov/covid19/documents/case-map.pdf,1.0,,,,,1.0,2020-03-25
20508,,Belknap County,NH,USA,61022.0,43.524873,-71.445707,https://www.nh.gov/covid19/documents/case-map.pdf,8.0,,,,,1.142857,2020-03-25
3191,,,SC,USA,5148714.0,33.619274,-80.964323,https://opendata.arcgis.com/datasets/5ffb6e698...,342.0,7.0,,,,1.0,2020-03-25
19672,,Bingham County,ID,USA,46236.0,43.244898,-112.297777,https://coronavirus.idaho.gov,1.0,,,,,1.0,2020-03-25
20544,,Lea County,NM,USA,69611.0,32.785403,-103.433605,https://cv.nmhealth.org/cases-by-county/,1.0,,,,,1.0,2020-03-25


In [23]:
# Grab CA counties for all dates starting 3/25/2020 and later.
# As of 3/26/2020, county data is somewhat incomplete rn
recent_us_data[(recent_us_data["state"] == "CA") & (recent_us_data["date"] >= '2020-3-25')].sort_values(by=['county'])

Unnamed: 0,city,county,state,country,population,lat,long,url,cases,deaths,recovered,active,tested,growthFactor,date
7340,,Alameda County,CA,USA,1666753.0,37.680107,-121.902445,https://docs.google.com/spreadsheets/d/1CwZA4R...,135.0,2.0,,,,1.0,2020-03-25
18733,,Amador County,CA,USA,39383.0,38.462022,-120.54995,https://docs.google.com/spreadsheets/d/1CwZA4R...,2.0,,,,,1.0,2020-03-25
16801,,Butte County,CA,USA,231256.0,39.723763,-121.573063,https://docs.google.com/spreadsheets/d/1CwZA4R...,4.0,,,,,1.0,2020-03-25
8887,,Calaveras County,CA,USA,45602.0,38.170646,-120.507724,https://docs.google.com/spreadsheets/d/1CwZA4R...,2.0,,,,,1.0,2020-03-25
8125,,Contra Costa County,CA,USA,1150215.0,37.909253,-121.983341,https://docs.google.com/spreadsheets/d/1CwZA4R...,108.0,1.0,,,,1.255814,2020-03-25
18355,,Del Norte County,CA,USA,27828.0,41.690911,-123.88695,https://docs.google.com/spreadsheets/d/1CwZA4R...,,,,,38.0,,2020-03-25
18738,,El Dorado County,CA,USA,190678.0,38.785457,-120.509148,https://docs.google.com/spreadsheets/d/1CwZA4R...,3.0,,,,,1.0,2020-03-25
7068,,Fresno County,CA,USA,994400.0,36.746462,-119.639658,https://docs.google.com/spreadsheets/d/1CwZA4R...,18.0,,,,,1.0,2020-03-25
18358,,Glenn County,CA,USA,28047.0,39.591375,-122.405772,https://docs.google.com/spreadsheets/d/1CwZA4R...,,,,,4.0,,2020-03-25
8522,,Humboldt County,CA,USA,136373.0,40.733564,-123.907815,https://docs.google.com/spreadsheets/d/1CwZA4R...,5.0,,1.0,4.0,,1.25,2020-03-25
