In [1]:
"""
From Corona Data Scraper:
https://coronadatascraper.com/#home

This dataset is really comprehensive, please consider using!

Note that NaNs in the columns do not imply 0s, it justs means that the data isn't
accurate yet (say in the instance that a county isn't reporting).

"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import git
repo = git.Repo("./", search_parent_directories=True)
homedir = repo.working_dir

In [2]:
# Geolocation and Population Data
df = pd.read_csv(f"{homedir}/data/international/covid/corona_data_scraper/geopop.csv", parse_dates=['date'])

# Or if you want to directly fetch the data from online instead of daily refresh
# df = pd.read_csv('https://coronadatascraper.com/timeseries.csv', parse_dates=['date'])


In [3]:
df.head(10)

Unnamed: 0,city,county,state,country_code,population,lat,long,url,cases,deaths,recovered,active,tested,growthFactor,date,country,location
0,,,,THA,68414135.0,13.040833,101.544556,https://github.com/CSSEGISandData/COVID-19,2.0,,,2.0,,,2020-01-22,Thailand,Thailand
1,,,,THA,68414135.0,13.040833,101.544556,https://github.com/CSSEGISandData/COVID-19,3.0,,,3.0,,1.5,2020-01-23,Thailand,Thailand
2,,,,THA,68414135.0,13.040833,101.544556,https://github.com/CSSEGISandData/COVID-19,5.0,,,5.0,,1.666667,2020-01-24,Thailand,Thailand
3,,,,THA,68414135.0,13.040833,101.544556,https://github.com/CSSEGISandData/COVID-19,7.0,,,7.0,,1.4,2020-01-25,Thailand,Thailand
4,,,,THA,68414135.0,13.040833,101.544556,https://github.com/CSSEGISandData/COVID-19,8.0,,2.0,6.0,,1.142857,2020-01-26,Thailand,Thailand
5,,,,THA,68414135.0,13.040833,101.544556,https://github.com/CSSEGISandData/COVID-19,8.0,,2.0,6.0,,1.0,2020-01-27,Thailand,Thailand
6,,,,THA,68414135.0,13.040833,101.544556,https://github.com/CSSEGISandData/COVID-19,14.0,,5.0,9.0,,1.75,2020-01-28,Thailand,Thailand
7,,,,THA,68414135.0,13.040833,101.544556,https://github.com/CSSEGISandData/COVID-19,14.0,,5.0,9.0,,1.0,2020-01-29,Thailand,Thailand
8,,,,THA,68414135.0,13.040833,101.544556,https://github.com/CSSEGISandData/COVID-19,14.0,,5.0,9.0,,1.0,2020-01-30,Thailand,Thailand
9,,,,THA,68414135.0,13.040833,101.544556,https://github.com/CSSEGISandData/COVID-19,19.0,,5.0,14.0,,1.357143,2020-01-31,Thailand,Thailand


In [4]:
# Sort By US discard the rest
us_data = df[df["country_code"] == "USA"]

In [9]:
us_data[us_data["date"] == "3-15-2020"].head(10)

Unnamed: 0,city,county,state,country_code,population,lat,long,url,cases,deaths,recovered,active,tested,growthFactor,date,country,location
2949,,,WA,USA,7614893.0,47.279414,-120.806717,https://www.doh.wa.gov/Emergencies/Coronavirus,769.0,42.0,,,,1.197819,2020-03-15,USA,"WA, USA"
2964,,,NY,USA,19453561.0,42.760805,-75.840971,https://coronavirus.health.ny.gov/county-count...,729.0,,,,,1.189233,2020-03-15,USA,"NY, USA"
2980,,,CA,USA,39512223.0,37.265605,-119.267171,https://docs.google.com/spreadsheets/d/1CwZA4R...,426.0,6.0,6.0,414.0,,1.252941,2020-03-15,USA,"CA, USA"
2996,,,MA,USA,6892503.0,42.120381,-71.723059,https://www.mass.gov/doc/covid-19-cases-in-mas...,164.0,,,,,1.188406,2020-03-15,USA,"MA, USA"
3012,,,GA,USA,10617423.0,32.682499,-83.220683,https://dph.georgia.gov/covid-19-daily-status-...,99.0,1.0,,98.0,,1.5,2020-03-15,USA,"GA, USA"
3026,,,CO,USA,5695430.0,38.999821,-105.543097,https://opendata.arcgis.com/datasets/fbae53974...,131.0,,,,758.0,1.336735,2020-03-15,USA,"CO, USA"
3041,,,FL,USA,21477737.0,28.060754,-83.829651,https://opendata.arcgis.com/datasets/d9de96980...,115.0,4.0,,111.0,,1.513158,2020-03-15,USA,"FL, USA"
3057,,,NJ,USA,8882190.0,40.146016,-74.71937,https://opendata.arcgis.com/datasets/24f4fcf16...,98.0,,,,142.0,1.42029,2020-03-15,USA,"NJ, USA"
3072,,,OR,USA,4217737.0,44.117405,-120.505338,https://www.oregon.gov/oha/PH/DISEASESCONDITIO...,39.0,,,,,1.083333,2020-03-15,USA,"OR, USA"
3087,,,TX,USA,28995881.0,31.177528,-100.088471,https://opendata.arcgis.com/datasets/bc8305838...,56.0,,,,,1.098039,2020-03-15,USA,"TX, USA"


In [6]:
# County data and geolocations are now starting to get generated
recent_us_data = us_data.sort_values(by=['date'], ascending=False)

In [7]:
recent_us_data.head(10)

Unnamed: 0,city,county,state,country_code,population,lat,long,url,cases,deaths,recovered,active,tested,growthFactor,date,country,location
20962,,Pending County,TN,USA,,,,https://www.tn.gov/health/cedep/ncov.html,81.0,,,,,2.131579,2020-03-25,USA,"Pending County, TN, USA"
7475,,Davis County,UT,USA,351713.0,40.963096,-112.115984,https://coronavirus-dashboard.utah.gov/,35.0,,,,,1.129032,2020-03-25,USA,"Davis County, UT, USA"
15577,,Bristol County,MA,USA,564022.0,41.796574,-71.101309,https://www.mass.gov/doc/covid-19-cases-in-mas...,67.0,,,,,2.16129,2020-03-25,USA,"Bristol County, MA, USA"
15584,,Franklin County,MA,USA,70963.0,42.522516,-72.624274,https://www.mass.gov/doc/covid-19-cases-in-mas...,14.0,,,,,2.8,2020-03-25,USA,"Franklin County, MA, USA"
15594,,Hampden County,MA,USA,470406.0,42.170732,-72.60493,https://www.mass.gov/doc/covid-19-cases-in-mas...,45.0,,,,,1.875,2020-03-25,USA,"Hampden County, MA, USA"
15600,,Hampshire County,MA,USA,161355.0,42.369662,-72.63599,https://www.mass.gov/doc/covid-19-cases-in-mas...,11.0,,,,,1.375,2020-03-25,USA,"Hampshire County, MA, USA"
7405,,Rockland County,NY,USA,325695.0,41.160945,-74.060826,https://coronavirus.health.ny.gov/county-count...,968.0,,,,,1.442623,2020-03-25,USA,"Rockland County, NY, USA"
15610,,Plymouth County,MA,USA,518132.0,41.966696,-70.802879,https://www.mass.gov/doc/covid-19-cases-in-mas...,101.0,,,,,1.578125,2020-03-25,USA,"Plymouth County, MA, USA"
15618,,Guilford County,NC,USA,533670.0,36.078567,-79.789537,https://opendata.arcgis.com/datasets/969678bce...,22.0,,,,,1.375,2020-03-25,USA,"Guilford County, NC, USA"
15623,,Alamance County,NC,USA,166436.0,36.046625,-79.389903,https://opendata.arcgis.com/datasets/969678bce...,2.0,,,,,0.666667,2020-03-25,USA,"Alamance County, NC, USA"


In [8]:
# Grab CA counties for all dates starting 3/25/2020 and later.
# As of 3/26/2020, county data is somewhat incomplete rn
recent_us_data[(recent_us_data["state"] == "CA") & (recent_us_data["date"] >= '2020-3-25')].sort_values(by=['county'])

Unnamed: 0,city,county,state,country_code,population,lat,long,url,cases,deaths,recovered,active,tested,growthFactor,date,country,location
7340,,Alameda County,CA,USA,1666753.0,37.680107,-121.902445,https://docs.google.com/spreadsheets/d/1CwZA4R...,135.0,2.0,,,,1.0,2020-03-25,USA,"Alameda County, CA, USA"
18733,,Amador County,CA,USA,39383.0,38.462022,-120.549949,https://docs.google.com/spreadsheets/d/1CwZA4R...,2.0,,,,,1.0,2020-03-25,USA,"Amador County, CA, USA"
16801,,Butte County,CA,USA,231256.0,39.723763,-121.573063,https://docs.google.com/spreadsheets/d/1CwZA4R...,4.0,,,,,1.0,2020-03-25,USA,"Butte County, CA, USA"
8887,,Calaveras County,CA,USA,45602.0,38.170646,-120.507724,https://docs.google.com/spreadsheets/d/1CwZA4R...,2.0,,,,,1.0,2020-03-25,USA,"Calaveras County, CA, USA"
8125,,Contra Costa County,CA,USA,1150215.0,37.909253,-121.983341,https://docs.google.com/spreadsheets/d/1CwZA4R...,108.0,1.0,,,,1.255814,2020-03-25,USA,"Contra Costa County, CA, USA"
18355,,Del Norte County,CA,USA,27828.0,41.690911,-123.88695,https://docs.google.com/spreadsheets/d/1CwZA4R...,,,,,38.0,,2020-03-25,USA,"Del Norte County, CA, USA"
18738,,El Dorado County,CA,USA,190678.0,38.785457,-120.509148,https://docs.google.com/spreadsheets/d/1CwZA4R...,3.0,,,,,1.0,2020-03-25,USA,"El Dorado County, CA, USA"
7068,,Fresno County,CA,USA,994400.0,36.746462,-119.639658,https://docs.google.com/spreadsheets/d/1CwZA4R...,18.0,,,,,1.0,2020-03-25,USA,"Fresno County, CA, USA"
18358,,Glenn County,CA,USA,28047.0,39.591375,-122.405772,https://docs.google.com/spreadsheets/d/1CwZA4R...,,,,,4.0,,2020-03-25,USA,"Glenn County, CA, USA"
8522,,Humboldt County,CA,USA,136373.0,40.733564,-123.907815,https://docs.google.com/spreadsheets/d/1CwZA4R...,5.0,,1.0,4.0,,1.25,2020-03-25,USA,"Humboldt County, CA, USA"
