In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
pd.options.display.float_format = '{:.2f}'.format

In [2]:
# Read in New York Times online csv file with daily COVID data
# Convert FIPS number to integer, convert date to datetime, save file to local csv

us_covid_df = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv')
us_covid_df['fips'] = us_covid_df['fips'].fillna(0).astype(np.int64)
us_covid_df['date'] = pd.to_datetime(us_covid_df['date'], yearfirst=True)
us_covid_df.to_csv('us_covid.csv', index=False)

In [3]:
# Read in national population data provided by state, county, FIPS

us_pops_df = pd.read_csv('US_Populations.csv', engine='python', thousands=',')
us_pops_df

Unnamed: 0,FIPStxt,State,State2,Area_Name,Region,population
0,2000,Alaska,AK,Alaska,State,731545
1,2013,Alaska,AK,Aleutians East Borough,County,3337
2,2016,Alaska,AK,Aleutians West Census Area,County,5634
3,2020,Alaska,AK,Anchorage,County,288000
4,2050,Alaska,AK,Bethel Census Area,County,18386
...,...,...,...,...,...,...
3146,22119,Louisiana,LA,Webster,County,38340
3147,22121,Louisiana,LA,West Baton Rouge,County,26465
3148,22123,Louisiana,LA,West Carroll,County,10830
3149,22125,Louisiana,LA,West Feliciana,County,15568


In [4]:
# Merge New York Times data with population data

us_data_df = pd.merge(us_covid_df, us_pops_df, left_on=['county', 'state'], right_on=['Area_Name', 'State'])
us_data_df

Unnamed: 0,date,county,state,fips,cases,deaths,FIPStxt,State,State2,Area_Name,Region,population
0,2020-01-21,Snohomish,Washington,53061,1,0.00,53061,Washington,WA,Snohomish,County,822083
1,2020-01-22,Snohomish,Washington,53061,1,0.00,53061,Washington,WA,Snohomish,County,822083
2,2020-01-23,Snohomish,Washington,53061,1,0.00,53061,Washington,WA,Snohomish,County,822083
3,2020-01-24,Snohomish,Washington,53061,1,0.00,53061,Washington,WA,Snohomish,County,822083
4,2020-01-25,Snohomish,Washington,53061,1,0.00,53061,Washington,WA,Snohomish,County,822083
...,...,...,...,...,...,...,...,...,...,...,...,...
1822537,2021-11-07,Kalawao,Hawaii,15005,1,0.00,15005,Hawaii,HI,Kalawao,County,86
1822538,2021-11-08,Kalawao,Hawaii,15005,1,0.00,15005,Hawaii,HI,Kalawao,County,86
1822539,2021-11-09,Kalawao,Hawaii,15005,1,0.00,15005,Hawaii,HI,Kalawao,County,86
1822540,2021-11-10,Kalawao,Hawaii,15005,1,0.00,15005,Hawaii,HI,Kalawao,County,86


In [5]:
# Separate out data from the state level

states = us_data_df[(us_data_df['Region'] == 'State')]
states

Unnamed: 0,date,county,state,fips,cases,deaths,FIPStxt,State,State2,Area_Name,Region,population
47383,2020-03-07,District of Columbia,District of Columbia,11001,1,0.00,11000,District of Columbia,DC,District of Columbia,State,705749
47385,2020-03-08,District of Columbia,District of Columbia,11001,1,0.00,11000,District of Columbia,DC,District of Columbia,State,705749
47387,2020-03-09,District of Columbia,District of Columbia,11001,4,0.00,11000,District of Columbia,DC,District of Columbia,State,705749
47389,2020-03-10,District of Columbia,District of Columbia,11001,4,0.00,11000,District of Columbia,DC,District of Columbia,State,705749
47391,2020-03-11,District of Columbia,District of Columbia,11001,10,0.00,11000,District of Columbia,DC,District of Columbia,State,705749
...,...,...,...,...,...,...,...,...,...,...,...,...
1108976,2021-11-07,Arkansas,Arkansas,5001,3508,60.00,5000,Arkansas,AR,Arkansas,State,3017804
1108978,2021-11-08,Arkansas,Arkansas,5001,3508,60.00,5000,Arkansas,AR,Arkansas,State,3017804
1108980,2021-11-09,Arkansas,Arkansas,5001,3510,60.00,5000,Arkansas,AR,Arkansas,State,3017804
1108982,2021-11-10,Arkansas,Arkansas,5001,3513,60.00,5000,Arkansas,AR,Arkansas,State,3017804


In [6]:
# Separate out data specific to county level, drop redundant columns
#  Create infection and death rates, updating for each date in the dataframe

counties = us_data_df.drop(['fips', 'State','Area_Name'], axis=1)
counties['current_crate'] = counties['cases']*100000/counties['population']
counties['current_drate'] = counties['deaths']*100000/counties['population']
counties = counties[(counties['Region'] == 'County')]
counties

Unnamed: 0,date,county,state,cases,deaths,FIPStxt,State2,Region,population,current_crate,current_drate
0,2020-01-21,Snohomish,Washington,1,0.00,53061,WA,County,822083,0.12,0.00
1,2020-01-22,Snohomish,Washington,1,0.00,53061,WA,County,822083,0.12,0.00
2,2020-01-23,Snohomish,Washington,1,0.00,53061,WA,County,822083,0.12,0.00
3,2020-01-24,Snohomish,Washington,1,0.00,53061,WA,County,822083,0.12,0.00
4,2020-01-25,Snohomish,Washington,1,0.00,53061,WA,County,822083,0.12,0.00
...,...,...,...,...,...,...,...,...,...,...,...
1822537,2021-11-07,Kalawao,Hawaii,1,0.00,15005,HI,County,86,1162.79,0.00
1822538,2021-11-08,Kalawao,Hawaii,1,0.00,15005,HI,County,86,1162.79,0.00
1822539,2021-11-09,Kalawao,Hawaii,1,0.00,15005,HI,County,86,1162.79,0.00
1822540,2021-11-10,Kalawao,Hawaii,1,0.00,15005,HI,County,86,1162.79,0.00


In [7]:
# Isolate data from most current day.

counties_us = counties[(counties['date'] == '2021-11-11') & (counties['Region'] == 'County')]
counties_us

Unnamed: 0,date,county,state,cases,deaths,FIPStxt,State2,Region,population,current_crate,current_drate
660,2021-11-11,Snohomish,Washington,67991,771.00,53061,WA,County,822083,8270.58,93.79
1318,2021-11-11,Cook,Illinois,651472,11910.00,17031,IL,County,5150233,12649.37,231.25
1975,2021-11-11,Orange,California,329541,5634.00,6059,CA,County,3175692,10376.98,177.41
2631,2021-11-11,Maricopa,Arizona,760613,12419.00,4013,AZ,County,4485414,16957.48,276.88
3287,2021-11-11,Los Angeles,California,1508440,26814.00,6037,CA,County,10039107,15025.64,267.10
...,...,...,...,...,...,...,...,...,...,...,...
1821085,2021-11-11,Wheeler,Oregon,108,1.00,41069,OR,County,1332,8108.11,75.08
1821480,2021-11-11,King,Texas,20,0.00,48269,TX,County,272,7352.94,0.00
1821844,2021-11-11,Esmeralda,Nevada,62,2.00,32009,NV,County,873,7101.95,229.10
1822204,2021-11-11,Loving,Texas,8,0.00,48301,TX,County,169,4733.73,0.00


In [8]:
# Create up to date rates so rankings can be performed using the latest data.

counties_us['drate'] = counties_us['deaths']/(counties_us['population']/100000)
counties_us['crate'] = counties_us['cases']/(counties_us['population']/100000)
counties_us = counties_us[(counties_us['Region'] == 'County')]
counties_us

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  counties_us['drate'] = counties_us['deaths']/(counties_us['population']/100000)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  counties_us['crate'] = counties_us['cases']/(counties_us['population']/100000)


Unnamed: 0,date,county,state,cases,deaths,FIPStxt,State2,Region,population,current_crate,current_drate,drate,crate
660,2021-11-11,Snohomish,Washington,67991,771.00,53061,WA,County,822083,8270.58,93.79,93.79,8270.58
1318,2021-11-11,Cook,Illinois,651472,11910.00,17031,IL,County,5150233,12649.37,231.25,231.25,12649.37
1975,2021-11-11,Orange,California,329541,5634.00,6059,CA,County,3175692,10376.98,177.41,177.41,10376.98
2631,2021-11-11,Maricopa,Arizona,760613,12419.00,4013,AZ,County,4485414,16957.48,276.88,276.88,16957.48
3287,2021-11-11,Los Angeles,California,1508440,26814.00,6037,CA,County,10039107,15025.64,267.10,267.10,15025.64
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1821085,2021-11-11,Wheeler,Oregon,108,1.00,41069,OR,County,1332,8108.11,75.08,75.08,8108.11
1821480,2021-11-11,King,Texas,20,0.00,48269,TX,County,272,7352.94,0.00,0.00,7352.94
1821844,2021-11-11,Esmeralda,Nevada,62,2.00,32009,NV,County,873,7101.95,229.10,229.10,7101.95
1822204,2021-11-11,Loving,Texas,8,0.00,48301,TX,County,169,4733.73,0.00,0.00,4733.73


In [9]:
# Create ranking of infection rates by county for each state

counties_us.sort_values(by=['state', 'crate', 'county'], inplace=True, ascending=(True, False, True))
counties_us['state_case']=counties_us.groupby([counties_us.State2]).cumcount()+1
counties_us

Unnamed: 0,date,county,state,cases,deaths,FIPStxt,State2,Region,population,current_crate,current_drate,drate,crate,state_case
1159979,2021-11-11,Hale,Alabama,3149,88.00,1065,AL,County,14651,21493.41,600.64,600.64,21493.41,1
1263499,2021-11-11,Clarke,Alabama,4816,86.00,1025,AL,County,23622,20387.77,364.07,364.07,20387.77,2
978788,2021-11-11,Winston,Alabama,4758,105.00,1133,AL,County,23629,20136.27,444.37,444.37,20136.27,3
672966,2021-11-11,Franklin,Alabama,6192,107.00,1059,AL,County,31362,19743.64,341.18,341.18,19743.64,4
327436,2021-11-11,Calhoun,Alabama,22371,513.00,1015,AL,County,113605,19691.91,451.56,451.56,19691.91,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1106611,2021-11-11,Converse,Wyoming,2264,39.00,56009,WY,County,13822,16379.68,282.16,282.16,16379.68,19
975813,2021-11-11,Johnson,Wyoming,1259,16.00,56019,WY,County,8445,14908.23,189.46,189.46,14908.23,20
1378356,2021-11-11,Lincoln,Wyoming,2880,23.00,56023,WY,County,19830,14523.45,115.99,115.99,14523.45,21
1107205,2021-11-11,Sublette,Wyoming,1423,21.00,56035,WY,County,9831,14474.62,213.61,213.61,14474.62,22


In [10]:
# Create ranking of death rates by county for each state

counties_us.sort_values(by=['state', 'drate', 'county'], inplace=True, ascending=(True, False, True))
counties_us['state_death']=counties_us.groupby([counties_us.State2]).cumcount()+1
counties_us

Unnamed: 0,date,county,state,cases,deaths,FIPStxt,State2,Region,population,current_crate,current_drate,drate,crate,state_case,state_death
906081,2021-11-11,Lowndes,Alabama,1778,67.00,1085,AL,County,9726,18280.90,688.88,688.88,18280.90,22,1
1159979,2021-11-11,Hale,Alabama,3149,88.00,1065,AL,County,14651,21493.41,600.64,600.64,21493.41,1,2
383605,2021-11-11,Walker,Alabama,11808,376.00,1127,AL,County,63521,18589.13,591.93,591.93,18589.13,17,3
802209,2021-11-11,Dallas,Alabama,5253,207.00,1047,AL,County,37196,14122.49,556.51,556.51,14122.49,62,4
904889,2021-11-11,Crenshaw,Alabama,2562,75.00,1041,AL,County,13772,18602.96,544.58,544.58,18602.96,16,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1730248,2021-11-11,Weston,Wyoming,1168,10.00,56045,WY,County,6927,16861.56,144.36,144.36,16861.56,16,19
1343664,2021-11-11,Uinta,Wyoming,3933,29.00,56041,WY,County,20226,19445.27,143.38,143.38,19445.27,7,20
1378356,2021-11-11,Lincoln,Wyoming,2880,23.00,56023,WY,County,19830,14523.45,115.99,115.99,14523.45,21,21
902505,2021-11-11,Albany,Wyoming,6503,36.00,56001,WY,County,38880,16725.82,92.59,92.59,16725.82,17,22


In [11]:
# Merge data from today with historical data to provide current rates + rates from previous points in time.

usa = pd.merge(counties, counties_us[['state', 'county','drate', 'crate', 'state_death', 'state_case']], on=['state', 'county'])
usa

Unnamed: 0,date,county,state,cases,deaths,FIPStxt,State2,Region,population,current_crate,current_drate,drate,crate,state_death,state_case
0,2020-01-21,Snohomish,Washington,1,0.00,53061,WA,County,822083,0.12,0.00,93.79,8270.58,28,28
1,2020-01-22,Snohomish,Washington,1,0.00,53061,WA,County,822083,0.12,0.00,93.79,8270.58,28,28
2,2020-01-23,Snohomish,Washington,1,0.00,53061,WA,County,822083,0.12,0.00,93.79,8270.58,28,28
3,2020-01-24,Snohomish,Washington,1,0.00,53061,WA,County,822083,0.12,0.00,93.79,8270.58,28,28
4,2020-01-25,Snohomish,Washington,1,0.00,53061,WA,County,822083,0.12,0.00,93.79,8270.58,28,28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1818316,2021-11-07,Kalawao,Hawaii,1,0.00,15005,HI,County,86,1162.79,0.00,0.00,1162.79,5,5
1818317,2021-11-08,Kalawao,Hawaii,1,0.00,15005,HI,County,86,1162.79,0.00,0.00,1162.79,5,5
1818318,2021-11-09,Kalawao,Hawaii,1,0.00,15005,HI,County,86,1162.79,0.00,0.00,1162.79,5,5
1818319,2021-11-10,Kalawao,Hawaii,1,0.00,15005,HI,County,86,1162.79,0.00,0.00,1162.79,5,5


In [12]:
# Break out daily case data from the New York Times csv that is cumulative in nature

usa.sort_values(by=['state', 'state_case', 'date'], inplace=True)
usa['case_day'] = usa.groupby(['state','county']).cases.diff().fillna(usa.cases)
usa[ '14day_case_avg' ] = usa.case_day.rolling(14).mean()
usa

Unnamed: 0,date,county,state,cases,deaths,FIPStxt,State2,Region,population,current_crate,current_drate,drate,crate,state_death,state_case,case_day,14day_case_avg
1155167,2020-03-30,Hale,Alabama,1,0.00,1065,AL,County,14651,6.83,0.00,600.64,21493.41,2,1,1.00,
1155168,2020-03-31,Hale,Alabama,1,0.00,1065,AL,County,14651,6.83,0.00,600.64,21493.41,2,1,0.00,
1155169,2020-04-01,Hale,Alabama,1,0.00,1065,AL,County,14651,6.83,0.00,600.64,21493.41,2,1,0.00,
1155170,2020-04-02,Hale,Alabama,1,0.00,1065,AL,County,14651,6.83,0.00,600.64,21493.41,2,1,0.00,
1155171,2020-04-03,Hale,Alabama,3,0.00,1065,AL,County,14651,20.48,0.00,600.64,21493.41,2,1,2.00,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1401720,2021-11-07,Crook,Wyoming,926,18.00,56011,WY,County,7584,12209.92,237.34,263.71,12434.07,10,23,0.00,4.43
1401721,2021-11-08,Crook,Wyoming,936,18.00,56011,WY,County,7584,12341.77,237.34,263.71,12434.07,10,23,10.00,4.43
1401722,2021-11-09,Crook,Wyoming,942,20.00,56011,WY,County,7584,12420.89,263.71,263.71,12434.07,10,23,6.00,4.43
1401723,2021-11-10,Crook,Wyoming,943,20.00,56011,WY,County,7584,12434.07,263.71,263.71,12434.07,10,23,1.00,4.29


In [13]:
# Break out daily death data from the New York Times csv that is cumulative in nature

usa.sort_values(by=['state', 'state_death', 'date'], inplace=True)
usa['death_day'] = usa.groupby(['state','county']).deaths.diff().fillna(usa.deaths)
usa[ '14day_death_avg' ] = usa.death_day.rolling(14).mean()
usa

Unnamed: 0,date,county,state,cases,deaths,FIPStxt,State2,Region,population,current_crate,current_drate,drate,crate,state_death,state_case,case_day,14day_case_avg,death_day,14day_death_avg
902453,2020-03-26,Lowndes,Alabama,1,0.00,1085,AL,County,9726,10.28,0.00,688.88,18280.90,1,22,1.00,2.21,0.00,
902454,2020-03-27,Lowndes,Alabama,1,0.00,1085,AL,County,9726,10.28,0.00,688.88,18280.90,1,22,0.00,1.93,0.00,
902455,2020-03-28,Lowndes,Alabama,1,0.00,1085,AL,County,9726,10.28,0.00,688.88,18280.90,1,22,0.00,1.79,0.00,
902456,2020-03-29,Lowndes,Alabama,1,0.00,1085,AL,County,9726,10.28,0.00,688.88,18280.90,1,22,0.00,1.79,0.00,
902457,2020-03-30,Lowndes,Alabama,1,0.00,1085,AL,County,9726,10.28,0.00,688.88,18280.90,1,22,0.00,1.64,0.00,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
379356,2021-11-07,Teton,Wyoming,5243,14.00,56039,WY,County,23464,22344.87,59.67,59.67,22528.13,23,3,0.00,8.86,0.00,0.00
379357,2021-11-08,Teton,Wyoming,5269,14.00,56039,WY,County,23464,22455.68,59.67,59.67,22528.13,23,3,26.00,8.36,0.00,0.00
379358,2021-11-09,Teton,Wyoming,5281,14.00,56039,WY,County,23464,22506.82,59.67,59.67,22528.13,23,3,12.00,8.36,0.00,0.00
379359,2021-11-10,Teton,Wyoming,5286,14.00,56039,WY,County,23464,22528.13,59.67,59.67,22528.13,23,3,5.00,8.43,0.00,0.00


In [14]:
# Export updated national data to csv for use in Tableau

usa.to_csv('daily_covid_us.csv', index=False)

In [15]:
# Filter California data to show only numbers for today

counties_cal = usa[(usa['date'] == '2021-11-11') & (usa['state'] == 'California')]
counties_cal

Unnamed: 0,date,county,state,cases,deaths,FIPStxt,State2,Region,population,current_crate,current_drate,drate,crate,state_death,state_case,case_day,14day_case_avg,death_day,14day_death_avg
458345,2021-11-11,Imperial,California,37252,765.0,6025,CA,County,181215,20556.8,422.15,422.15,20556.8,1,3,151.0,62.79,0.0,0.0
227457,2021-11-11,San Bernardino,California,367034,5849.0,6071,CA,County,2180085,16835.77,268.29,268.29,16835.77,2,6,695.0,444.07,10.0,5.14
3287,2021-11-11,Los Angeles,California,1508440,26814.0,6037,CA,County,10039107,15025.64,267.1,267.1,15025.64,3,14,1431.0,1311.14,18.0,15.0
101341,2021-11-11,Stanislaus,California,89331,1380.0,6099,CA,County,550660,16222.53,250.61,250.61,16222.53,4,7,0.0,150.86,0.0,2.14
1107728,2021-11-11,Tuolumne,California,7564,134.0,6109,CA,County,54478,13884.5,245.97,245.97,13884.5,5,20,0.0,21.86,0.0,0.93
80535,2021-11-11,San Joaquin,California,105503,1803.0,6077,CA,County,762148,13842.85,236.57,236.57,13842.85,6,21,170.0,137.36,5.0,3.21
805146,2021-11-11,Inyo,California,2465,42.0,6027,CA,County,18039,13664.84,232.83,232.83,13664.84,7,22,23.0,13.79,0.0,0.07
729217,2021-11-11,Merced,California,44412,644.0,6047,CA,County,277680,15993.95,231.92,231.92,15993.95,8,9,0.0,73.93,0.0,3.79
977540,2021-11-11,Kings,California,34439,345.0,6031,CA,County,152940,22517.98,225.58,225.58,22517.98,9,2,74.0,68.14,1.0,0.64
70115,2021-11-11,Shasta,California,25330,402.0,6089,CA,County,180080,14065.97,223.23,223.23,14065.97,10,18,47.0,62.29,0.0,2.71


In [16]:
# Show top 10 California counties based on largest current infection rate

counties_cal.sort_values(by=['crate'], inplace=True, ascending=False)
top10_ca_case_rates = counties_cal.head(10)
top10_ca_case_rates

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  counties_cal.sort_values(by=['crate'], inplace=True, ascending=False)


Unnamed: 0,date,county,state,cases,deaths,FIPStxt,State2,Region,population,current_crate,current_drate,drate,crate,state_death,state_case,case_day,14day_case_avg,death_day,14day_death_avg
1717990,2021-11-11,Lassen,California,7843,54.0,6035,CA,County,30573,25653.35,176.63,176.63,25653.35,20,1,22.0,13.79,1.0,0.21
977540,2021-11-11,Kings,California,34439,345.0,6031,CA,County,152940,22517.98,225.58,225.58,22517.98,9,2,74.0,68.14,1.0,0.64
458345,2021-11-11,Imperial,California,37252,765.0,6025,CA,County,181215,20556.8,422.15,422.15,20556.8,1,3,151.0,62.79,0.0,0.0
125778,2021-11-11,Tulare,California,84369,1028.0,6107,CA,County,466195,18097.36,220.51,220.51,18097.36,11,4,398.0,184.21,4.0,2.64
286281,2021-11-11,Kern,California,153048,1730.0,6029,CA,County,900202,17001.52,192.18,192.18,17001.52,14,5,0.0,324.36,0.0,4.5
227457,2021-11-11,San Bernardino,California,367034,5849.0,6071,CA,County,2180085,16835.77,268.29,268.29,16835.77,2,6,695.0,444.07,10.0,5.14
101341,2021-11-11,Stanislaus,California,89331,1380.0,6099,CA,County,550660,16222.53,250.61,250.61,16222.53,4,7,0.0,150.86,0.0,2.14
46152,2021-11-11,Madera,California,25400,291.0,6039,CA,County,157327,16144.72,184.97,184.97,16144.72,17,8,87.0,73.71,0.0,0.29
729217,2021-11-11,Merced,California,44412,644.0,6047,CA,County,277680,15993.95,231.92,231.92,15993.95,8,9,0.0,73.93,0.0,3.79
1044771,2021-11-11,Glenn,California,4513,46.0,6021,CA,County,28393,15894.76,162.01,162.01,15894.76,23,10,8.0,5.57,0.0,0.07


In [17]:
# Show top 10 California counties based on largest current death rate

counties_cal.sort_values(by=['drate'], inplace=True, ascending=False)
top10_ca_death_rates = counties_cal.head(10)
top10_ca_death_rates

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  counties_cal.sort_values(by=['drate'], inplace=True, ascending=False)


Unnamed: 0,date,county,state,cases,deaths,FIPStxt,State2,Region,population,current_crate,current_drate,drate,crate,state_death,state_case,case_day,14day_case_avg,death_day,14day_death_avg
458345,2021-11-11,Imperial,California,37252,765.0,6025,CA,County,181215,20556.8,422.15,422.15,20556.8,1,3,151.0,62.79,0.0,0.0
227457,2021-11-11,San Bernardino,California,367034,5849.0,6071,CA,County,2180085,16835.77,268.29,268.29,16835.77,2,6,695.0,444.07,10.0,5.14
3287,2021-11-11,Los Angeles,California,1508440,26814.0,6037,CA,County,10039107,15025.64,267.1,267.1,15025.64,3,14,1431.0,1311.14,18.0,15.0
101341,2021-11-11,Stanislaus,California,89331,1380.0,6099,CA,County,550660,16222.53,250.61,250.61,16222.53,4,7,0.0,150.86,0.0,2.14
1107728,2021-11-11,Tuolumne,California,7564,134.0,6109,CA,County,54478,13884.5,245.97,245.97,13884.5,5,20,0.0,21.86,0.0,0.93
80535,2021-11-11,San Joaquin,California,105503,1803.0,6077,CA,County,762148,13842.85,236.57,236.57,13842.85,6,21,170.0,137.36,5.0,3.21
805146,2021-11-11,Inyo,California,2465,42.0,6027,CA,County,18039,13664.84,232.83,232.83,13664.84,7,22,23.0,13.79,0.0,0.07
729217,2021-11-11,Merced,California,44412,644.0,6047,CA,County,277680,15993.95,231.92,231.92,15993.95,8,9,0.0,73.93,0.0,3.79
977540,2021-11-11,Kings,California,34439,345.0,6031,CA,County,152940,22517.98,225.58,225.58,22517.98,9,2,74.0,68.14,1.0,0.64
70115,2021-11-11,Shasta,California,25330,402.0,6089,CA,County,180080,14065.97,223.23,223.23,14065.97,10,18,47.0,62.29,0.0,2.71


In [18]:
# Create dataframe with historical data for Top 10 counties based on current case rate

top10c_df = top10_ca_case_rates[['county', 'state']]
top10_ca_case_df = pd.merge(usa, top10c_df, on=['county', 'state'])
top10_ca_case_df['date'] = pd.to_datetime(top10_ca_case_df['date']).dt.strftime('%m/%d/%y')
top10_ca_case_df = top10_ca_case_df[['date', 'county','cases', 'current_crate', 'state_case', 'case_day', '14day_case_avg']]
top10_ca_case_df

Unnamed: 0,date,county,cases,current_crate,state_case,case_day,14day_case_avg
0,03/20/20,Imperial,4,2.21,3,4.00,58.36
1,03/21/20,Imperial,4,2.21,3,0.00,58.36
2,03/22/20,Imperial,4,2.21,3,0.00,58.36
3,03/23/20,Imperial,4,2.21,3,0.00,41.50
4,03/24/20,Imperial,9,4.97,3,5.00,38.14
...,...,...,...,...,...,...,...
5970,11/07/21,Glenn,4485,15796.15,10,0.00,7.21
5971,11/08/21,Glenn,4499,15845.45,10,14.00,8.21
5972,11/09/21,Glenn,4502,15856.02,10,3.00,6.86
5973,11/10/21,Glenn,4505,15866.59,10,3.00,6.36


In [19]:
# Create dataframe with historical data for Top 10 counties based on current death rate

top10d_df = top10_ca_death_rates[['county', 'state']]
top10_ca_death_df = pd.merge(usa, top10d_df, on=['county', 'state'])
top10_ca_death_df['date'] = pd.to_datetime(top10_ca_death_df['date']).dt.strftime('%m/%d/%y')
top10_ca_death_df = top10_ca_death_df[['date', 'county','deaths', 'current_drate', 'state_death', 'death_day', '14day_death_avg']]
top10_ca_death_df

Unnamed: 0,date,county,deaths,current_drate,state_death,death_day,14day_death_avg
0,03/20/20,Imperial,0.00,0.00,1,0.00,0.00
1,03/21/20,Imperial,0.00,0.00,1,0.00,0.00
2,03/22/20,Imperial,0.00,0.00,1,0.00,0.00
3,03/23/20,Imperial,0.00,0.00,1,0.00,0.00
4,03/24/20,Imperial,0.00,0.00,1,0.00,0.00
...,...,...,...,...,...,...,...
6079,11/07/21,Shasta,384.00,213.24,10,0.00,2.29
6080,11/08/21,Shasta,391.00,217.13,10,7.00,2.64
6081,11/09/21,Shasta,395.00,219.35,10,4.00,2.93
6082,11/10/21,Shasta,402.00,223.23,10,7.00,2.71


In [20]:
# Export California Top 10 case data to csv

top10_ca_case_df.to_csv('top10cases.csv', float_format='%.2f', index=False)

In [21]:
# Export California Top 10 death data to csv

top10_ca_death_df.to_csv('top10deaths.csv', float_format='%.2f', index=False)