In [35]:
import pandas as pd

from datetime import datetime, timedelta
import json
geo_json_path = 'data/geo_data/geojson-counties-fips.json'
with open(geo_json_path,'r') as response:
    counties = json.load(response)

In [33]:
counties_url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
states_url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv'
county_data = pd.read_csv(counties_url,dtype={"fips": str})
state_data = pd.read_csv(states_url,dtype={"fips": str})
print("Received latest NYT Data")

Received latest NYT Data


In [3]:
len(county)

350365

In [4]:
len(county.dropna())

346810

In [5]:
county[county.fips.isna()].county.unique()

array(['New York City', 'Unknown', 'Kansas City', 'Joplin'], dtype=object)

In [13]:
county[county.fips.isna()][county.county == "Unknown"]

  """Entry point for launching an IPython kernel.


Unnamed: 0,date,county,state,fips,cases,deaths
418,2020-03-01,Unknown,Rhode Island,,2,0
450,2020-03-02,Unknown,Rhode Island,,2,0
485,2020-03-03,Unknown,Rhode Island,,2,0
522,2020-03-04,Unknown,Rhode Island,,2,0
569,2020-03-05,Unknown,Rhode Island,,2,0
636,2020-03-06,Unknown,Rhode Island,,3,0
728,2020-03-07,Unknown,Rhode Island,,3,0
837,2020-03-08,Unknown,Rhode Island,,3,0
966,2020-03-09,Unknown,Rhode Island,,3,0
1119,2020-03-10,Unknown,Rhode Island,,5,0


In [24]:
fips = pd.DataFrame(counties['features'])

In [28]:
len(fips.id)

3221

In [30]:
county.fips.nunique()

3174

In [31]:
3221-3174

47

In [36]:
# CALCULATED VALUES

latestDate = county_data.date.max()
latest_dt = datetime.strptime(latestDate, '%Y-%m-%d')
days_since_outbreak = (latest_dt - datetime.strptime('2019-12-31', '%Y-%m-%d')).days
days_since_USA = (latest_dt - datetime.strptime('2020-01-21', '%Y-%m-%d')).days
day_prior = datetime.strftime((latest_dt - timedelta(days=1)),'%Y-%m-%d')
two_prior = datetime.strftime((latest_dt - timedelta(days=2)),'%Y-%m-%d')

confirmed_cases = county_data.query("date=={}".format("'" + latestDate + "'")).cases.sum()
cases_day_prior = county_data.query("date=={}".format("'" + day_prior + "'")).cases.sum()
cases_two_prior = county_data.query("date=={}".format("'" + two_prior + "'")).cases.sum()

case_rate = (confirmed_cases - cases_day_prior) / cases_day_prior
case_prior_rate = (cases_day_prior - cases_two_prior) / cases_two_prior
case_rate_delta = (case_rate - case_prior_rate) / case_prior_rate
if case_rate_delta < 0:
    case_rate_delta_s = '{:.2%}'.format(case_rate_delta)
else:
    case_rate_delta_s = '+ {:.2%}'.format(case_rate_delta)
    
case_delta = confirmed_cases - cases_day_prior
case_percent_diff = (case_delta) / cases_day_prior


confirmed_deaths = county_data.query("date=={}".format("'" + latestDate + "'")).deaths.sum()
death_day_prior = county_data.query("date=={}".format("'" + day_prior + "'")).deaths.sum()
death_two_prior = county_data.query("date=={}".format("'" + two_prior + "'")).deaths.sum()

death_rate = (confirmed_deaths - death_day_prior) / death_day_prior
death_prior_rate = (death_day_prior - death_two_prior) / death_two_prior
death_rate_delta = (death_rate - death_prior_rate) / death_prior_rate
if death_rate_delta < 0:
    death_rate_delta_s = '{:.2%}'.format(death_rate_delta)
else:
    death_rate_delta_s = '+ {:.2%}'.format(death_rate_delta)
    

death_delta = confirmed_deaths - death_day_prior
death_percent_diff = (death_delta) / death_day_prior

latest = county_data.query("date=={}".format("'" + latestDate + "'"))


total = state_data.groupby(state_data.date).sum()
difference = total.diff()
difference = difference.fillna(0)
difference.columns = ['case_increase','death_increase']
new = total.merge(difference, left_index=True, right_index=True)


In [39]:
latest.fips.nunique()

3173

In [41]:
latest.head()

Unnamed: 0,date,county,state,fips,cases,deaths
347163,2020-07-19,Autauga,Alabama,1001,842,21
347164,2020-07-19,Baldwin,Alabama,1003,1937,15
347165,2020-07-19,Barbour,Alabama,1005,495,3
347166,2020-07-19,Bibb,Alabama,1007,269,2
347167,2020-07-19,Blount,Alabama,1009,482,1


In [42]:


census = pd.read_csv('data/co-est2019-alldata.csv',encoding = 'latin-1',dtype={"STATE": str,"COUNTY":str})
census = census.query('COUNTY!="000"')
census["fips"] = census.STATE + census.COUNTY
pop = census[['POPESTIMATE2019']].copy()
pop.columns = ['Population']
pop.index = census.fips
county_pop = pop.to_dict()

In [45]:
len(county_pop['Population'])

3142

In [58]:
census_fips = pop.index.to_list()

In [59]:
latest_fips = latest.fips.to_list()

In [60]:
not_in = []
for each in latest_fips:
    if each not in census_fips:
        not_in.append(each)

In [61]:
not_in

[nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 '69110',
 '69120',
 '72001',
 '72003',
 '72005',
 '72007',
 '72009',
 '72011',
 '72013',
 '72015',
 '72017',
 '72019',
 '72021',
 '72023',
 '72025',
 '72027',
 '72029',
 '72031',
 '72033',
 '72035',
 '72037',
 '72039',
 '72041',
 '72043',
 '72045',
 '72047',
 '72049',
 '72051',
 '72053',
 '72054',
 '72055',
 '72057',
 '72059',
 '72061',
 '72063',
 '72065',
 '72067',
 '72069',
 '72071',
 '72073',
 '72075',
 '72077',
 '72079',
 '72081',
 '72083',
 '72085',
 '72087',
 '72089',
 '72091',
 '72093',
 '72095',
 '72097',
 '72099',
 '72101',
 '72103',
 '72105',
 '72107',
 '72109',
 '72111',
 '72113',
 '72115',
 '72117',
 '72119',
 '72121',
 '72123',
 '72125',
 '72127',
 '72129',
 '72131',
 '72133',
 '72135',
 '72137',
 '72139',
 nan,
 '72141',
 '72143',
 '72145',
 '72147',
 '72149',
 '72151',
 '72153',
 nan,
 nan,
 nan,
 nan,
 '78010',
 '78020',
 '78030',
 nan,
 nan,
 n

In [66]:

county_data = pop.merge(county_data, how='inner',on='fips')

county_data['percent_pop'] = round(10*county_data.cases/county_data.Population_x,2)
latest2 = county_data.query("date=={}".format("'" + latestDate + "'"))
top = latest2.percent_pop.max()

In [69]:
latest2

3090

In [73]:
latest2

Unnamed: 0,fips,Population,Population_x,date,county,state,cases,deaths,Population_y,percent_pop
117,01001,55869,55869,2020-07-19,Autauga,Alabama,842,21,55869,0.15
245,01003,223234,223234,2020-07-19,Baldwin,Alabama,1937,15,223234,0.09
353,01005,24686,24686,2020-07-19,Barbour,Alabama,495,3,24686,0.20
465,01007,22394,22394,2020-07-19,Bibb,Alabama,269,2,22394,0.12
582,01009,57826,57826,2020-07-19,Blount,Alabama,482,1,57826,0.08
698,01011,10101,10101,2020-07-19,Bullock,Alabama,398,11,10101,0.39
815,01013,19448,19448,2020-07-19,Butler,Alabama,686,33,19448,0.35
939,01015,113605,113605,2020-07-19,Calhoun,Alabama,785,6,113605,0.07
1062,01017,33254,33254,2020-07-19,Chambers,Alabama,762,33,33254,0.23
1179,01019,26196,26196,2020-07-19,Cherokee,Alabama,173,7,26196,0.07
