In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
pd.options.display.float_format = '{:.2f}'.format

In [2]:
# Read in New York Times online csv file with daily COVID data
# Convert FIPS number to integer, convert date to datetime, save file to local csv

us_covid_df = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv')
us_covid_df['fips'] = us_covid_df['fips'].fillna(0).astype(np.int64)
us_covid_df['date'] = pd.to_datetime(us_covid_df['date'], yearfirst=True)
us_covid_df.to_csv('us_covid.csv', index=False)

In [3]:
# Read in national population data provided by state, county, FIPS

us_pops_df = pd.read_csv('US_Populations.csv', engine='python', thousands=',')
us_pops_df

Unnamed: 0,FIPStxt,State,State2,Area_Name,Region,population
0,2000,Alaska,AK,Alaska,State,731545
1,2013,Alaska,AK,Aleutians East Borough,County,3337
2,2016,Alaska,AK,Aleutians West Census Area,County,5634
3,2020,Alaska,AK,Anchorage,County,288000
4,2050,Alaska,AK,Bethel Census Area,County,18386
...,...,...,...,...,...,...
3146,22119,Louisiana,LA,Webster,County,38340
3147,22121,Louisiana,LA,West Baton Rouge,County,26465
3148,22123,Louisiana,LA,West Carroll,County,10830
3149,22125,Louisiana,LA,West Feliciana,County,15568


In [4]:
# Merge New York Times data with population data

us_data_df = pd.merge(us_covid_df, us_pops_df, left_on=['county', 'state'], right_on=['Area_Name', 'State'])
us_data_df

Unnamed: 0,date,county,state,fips,cases,deaths,FIPStxt,State,State2,Area_Name,Region,population
0,2020-01-21,Snohomish,Washington,53061,1,0.00,53061,Washington,WA,Snohomish,County,822083
1,2020-01-22,Snohomish,Washington,53061,1,0.00,53061,Washington,WA,Snohomish,County,822083
2,2020-01-23,Snohomish,Washington,53061,1,0.00,53061,Washington,WA,Snohomish,County,822083
3,2020-01-24,Snohomish,Washington,53061,1,0.00,53061,Washington,WA,Snohomish,County,822083
4,2020-01-25,Snohomish,Washington,53061,1,0.00,53061,Washington,WA,Snohomish,County,822083
...,...,...,...,...,...,...,...,...,...,...,...,...
2169513,2022-02-27,Kalawao,Hawaii,15005,1,0.00,15005,Hawaii,HI,Kalawao,County,86
2169514,2022-02-28,Kalawao,Hawaii,15005,1,0.00,15005,Hawaii,HI,Kalawao,County,86
2169515,2022-03-01,Kalawao,Hawaii,15005,1,0.00,15005,Hawaii,HI,Kalawao,County,86
2169516,2022-03-02,Kalawao,Hawaii,15005,1,0.00,15005,Hawaii,HI,Kalawao,County,86


In [5]:
# Separate out data from the state level

states = us_data_df[(us_data_df['Region'] == 'State')]
states

Unnamed: 0,date,county,state,fips,cases,deaths,FIPStxt,State,State2,Area_Name,Region,population
55895,2020-03-07,District of Columbia,District of Columbia,11001,1,0.00,11000,District of Columbia,DC,District of Columbia,State,705749
55897,2020-03-08,District of Columbia,District of Columbia,11001,1,0.00,11000,District of Columbia,DC,District of Columbia,State,705749
55899,2020-03-09,District of Columbia,District of Columbia,11001,4,0.00,11000,District of Columbia,DC,District of Columbia,State,705749
55901,2020-03-10,District of Columbia,District of Columbia,11001,4,0.00,11000,District of Columbia,DC,District of Columbia,State,705749
55903,2020-03-11,District of Columbia,District of Columbia,11001,10,0.00,11000,District of Columbia,DC,District of Columbia,State,705749
...,...,...,...,...,...,...,...,...,...,...,...,...
1315168,2022-02-27,Arkansas,Arkansas,5001,5631,75.00,5000,Arkansas,AR,Arkansas,State,3017804
1315170,2022-02-28,Arkansas,Arkansas,5001,5633,75.00,5000,Arkansas,AR,Arkansas,State,3017804
1315172,2022-03-01,Arkansas,Arkansas,5001,5634,75.00,5000,Arkansas,AR,Arkansas,State,3017804
1315174,2022-03-02,Arkansas,Arkansas,5001,5637,75.00,5000,Arkansas,AR,Arkansas,State,3017804


In [6]:
# Separate out data specific to county level, drop redundant columns
#  Create infection and death rates, updating for each date in the dataframe

counties = us_data_df.drop(['fips', 'State','Area_Name'], axis=1)
counties['current_crate'] = counties['cases']*100000/counties['population']
counties['current_drate'] = counties['deaths']*100000/counties['population']
counties = counties[(counties['Region'] == 'County')]
counties

Unnamed: 0,date,county,state,cases,deaths,FIPStxt,State2,Region,population,current_crate,current_drate
0,2020-01-21,Snohomish,Washington,1,0.00,53061,WA,County,822083,0.12,0.00
1,2020-01-22,Snohomish,Washington,1,0.00,53061,WA,County,822083,0.12,0.00
2,2020-01-23,Snohomish,Washington,1,0.00,53061,WA,County,822083,0.12,0.00
3,2020-01-24,Snohomish,Washington,1,0.00,53061,WA,County,822083,0.12,0.00
4,2020-01-25,Snohomish,Washington,1,0.00,53061,WA,County,822083,0.12,0.00
...,...,...,...,...,...,...,...,...,...,...,...
2169513,2022-02-27,Kalawao,Hawaii,1,0.00,15005,HI,County,86,1162.79,0.00
2169514,2022-02-28,Kalawao,Hawaii,1,0.00,15005,HI,County,86,1162.79,0.00
2169515,2022-03-01,Kalawao,Hawaii,1,0.00,15005,HI,County,86,1162.79,0.00
2169516,2022-03-02,Kalawao,Hawaii,1,0.00,15005,HI,County,86,1162.79,0.00


In [7]:
# Isolate data from most current day.

counties_us = counties[(counties['date'] == '2022-03-03') & (counties['Region'] == 'County')]
counties_us

Unnamed: 0,date,county,state,cases,deaths,FIPStxt,State2,Region,population,current_crate,current_drate
772,2022-03-03,Snohomish,Washington,147025,1061.00,53061,WA,County,822083,17884.45,129.06
1542,2022-03-03,Cook,Illinois,1113031,14654.00,17031,IL,County,5150233,21611.27,284.53
2311,2022-03-03,Orange,California,582034,6643.00,6059,CA,County,3175692,18327.78,209.18
3079,2022-03-03,Maricopa,Arizona,1248818,15761.00,4013,AZ,County,4485414,27841.76,351.38
3847,2022-03-03,Los Angeles,California,2801445,30911.00,6037,CA,County,10039107,27905.32,307.91
...,...,...,...,...,...,...,...,...,...,...,...
2167613,2022-03-03,Wheeler,Oregon,243,3.00,41069,OR,County,1332,18243.24,225.23
2168120,2022-03-03,King,Texas,41,0.00,48269,TX,County,272,15073.53,0.00
2168596,2022-03-03,Esmeralda,Nevada,90,3.00,32009,NV,County,873,10309.28,343.64
2169068,2022-03-03,Loving,Texas,164,0.00,48301,TX,County,169,97041.42,0.00


In [8]:
# Create up to date rates so rankings can be performed using the latest data.

counties_us['drate'] = counties_us['deaths']/(counties_us['population']/100000)
counties_us['crate'] = counties_us['cases']/(counties_us['population']/100000)
counties_us = counties_us[(counties_us['Region'] == 'County')]
counties_us

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  counties_us['drate'] = counties_us['deaths']/(counties_us['population']/100000)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  counties_us['crate'] = counties_us['cases']/(counties_us['population']/100000)


Unnamed: 0,date,county,state,cases,deaths,FIPStxt,State2,Region,population,current_crate,current_drate,drate,crate
772,2022-03-03,Snohomish,Washington,147025,1061.00,53061,WA,County,822083,17884.45,129.06,129.06,17884.45
1542,2022-03-03,Cook,Illinois,1113031,14654.00,17031,IL,County,5150233,21611.27,284.53,284.53,21611.27
2311,2022-03-03,Orange,California,582034,6643.00,6059,CA,County,3175692,18327.78,209.18,209.18,18327.78
3079,2022-03-03,Maricopa,Arizona,1248818,15761.00,4013,AZ,County,4485414,27841.76,351.38,351.38,27841.76
3847,2022-03-03,Los Angeles,California,2801445,30911.00,6037,CA,County,10039107,27905.32,307.91,307.91,27905.32
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2167613,2022-03-03,Wheeler,Oregon,243,3.00,41069,OR,County,1332,18243.24,225.23,225.23,18243.24
2168120,2022-03-03,King,Texas,41,0.00,48269,TX,County,272,15073.53,0.00,0.00,15073.53
2168596,2022-03-03,Esmeralda,Nevada,90,3.00,32009,NV,County,873,10309.28,343.64,343.64,10309.28
2169068,2022-03-03,Loving,Texas,164,0.00,48301,TX,County,169,97041.42,0.00,0.00,97041.42


In [9]:
# Create ranking of infection rates by county for each state

counties_us.sort_values(by=['state', 'crate', 'county'], inplace=True, ascending=(True, False, True))
counties_us['state_case']=counties_us.groupby([counties_us.State2]).cumcount()+1
counties_us

Unnamed: 0,date,county,state,cases,deaths,FIPStxt,State2,Region,population,current_crate,current_drate,drate,crate,state_case
1160452,2022-03-03,Winston,Alabama,7542,120.00,1133,AL,County,23629,31918.41,507.85,507.85,31918.41,1
1375803,2022-03-03,Hale,Alabama,4669,101.00,1065,AL,County,14651,31868.13,689.37,689.37,31868.13,2
797286,2022-03-03,Franklin,Alabama,9818,137.00,1059,AL,County,31362,31305.40,436.83,436.83,31305.40,3
948594,2022-03-03,Clay,Alabama,4077,78.00,1027,AL,County,13235,30804.68,589.35,589.35,30804.68,4
1375099,2022-03-03,Fayette,Alabama,4920,93.00,1057,AL,County,16302,30180.35,570.48,570.48,30180.35,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1156917,2022-03-03,Johnson,Wyoming,1840,19.00,56019,WY,County,8445,21788.04,224.99,224.99,21788.04,19
1857516,2022-03-03,Big Horn,Wyoming,2565,60.00,56003,WY,County,11790,21755.73,508.91,508.91,21755.73,20
1635620,2022-03-03,Lincoln,Wyoming,3953,29.00,56023,WY,County,19830,19934.44,146.24,146.24,19934.44,21
1313061,2022-03-03,Sublette,Wyoming,1918,27.00,56035,WY,County,9831,19509.71,274.64,274.64,19509.71,22


In [10]:
# Create ranking of death rates by county for each state

counties_us.sort_values(by=['state', 'drate', 'county'], inplace=True, ascending=(True, False, True))
counties_us['state_death']=counties_us.groupby([counties_us.State2]).cumcount()+1
counties_us

Unnamed: 0,date,county,state,cases,deaths,FIPStxt,State2,Region,population,current_crate,current_drate,drate,crate,state_case,state_death
1074081,2022-03-03,Lowndes,Alabama,2600,73.00,1085,AL,County,9726,26732.47,750.57,750.57,26732.47,30,1
1375803,2022-03-03,Hale,Alabama,4669,101.00,1065,AL,County,14651,31868.13,689.37,689.37,31868.13,2,2
1072665,2022-03-03,Crenshaw,Alabama,3775,93.00,1041,AL,County,13772,27410.69,675.28,675.28,27410.69,23,3
454053,2022-03-03,Walker,Alabama,18480,419.00,1127,AL,County,63521,29092.74,659.62,659.62,29092.74,10,4
950721,2022-03-03,Dallas,Alabama,8328,231.00,1047,AL,County,37196,22389.50,621.03,621.03,22389.50,60,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1156917,2022-03-03,Johnson,Wyoming,1840,19.00,56019,WY,County,8445,21788.04,224.99,224.99,21788.04,19,19
1594320,2022-03-03,Uinta,Wyoming,5624,37.00,56041,WY,County,20226,27805.79,182.93,182.93,27805.79,5,20
1635620,2022-03-03,Lincoln,Wyoming,3953,29.00,56023,WY,County,19830,19934.44,146.24,146.24,19934.44,21,21
1069833,2022-03-03,Albany,Wyoming,9825,47.00,56001,WY,County,38880,25270.06,120.88,120.88,25270.06,14,22


In [11]:
# Merge data from today with historical data to provide current rates + rates from previous points in time.

usa = pd.merge(counties, counties_us[['state', 'county','drate', 'crate', 'state_death', 'state_case']], on=['state', 'county'])
usa

Unnamed: 0,date,county,state,cases,deaths,FIPStxt,State2,Region,population,current_crate,current_drate,drate,crate,state_death,state_case
0,2020-01-21,Snohomish,Washington,1,0.00,53061,WA,County,822083,0.12,0.00,129.06,17884.45,31,20
1,2020-01-22,Snohomish,Washington,1,0.00,53061,WA,County,822083,0.12,0.00,129.06,17884.45,31,20
2,2020-01-23,Snohomish,Washington,1,0.00,53061,WA,County,822083,0.12,0.00,129.06,17884.45,31,20
3,2020-01-24,Snohomish,Washington,1,0.00,53061,WA,County,822083,0.12,0.00,129.06,17884.45,31,20
4,2020-01-25,Snohomish,Washington,1,0.00,53061,WA,County,822083,0.12,0.00,129.06,17884.45,31,20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2164508,2022-02-27,Kalawao,Hawaii,1,0.00,15005,HI,County,86,1162.79,0.00,0.00,1162.79,5,5
2164509,2022-02-28,Kalawao,Hawaii,1,0.00,15005,HI,County,86,1162.79,0.00,0.00,1162.79,5,5
2164510,2022-03-01,Kalawao,Hawaii,1,0.00,15005,HI,County,86,1162.79,0.00,0.00,1162.79,5,5
2164511,2022-03-02,Kalawao,Hawaii,1,0.00,15005,HI,County,86,1162.79,0.00,0.00,1162.79,5,5


In [12]:
# Break out daily case data from the New York Times csv that is cumulative in nature

usa.sort_values(by=['state', 'state_case', 'date'], inplace=True)
usa['case_day'] = usa.groupby(['state','county']).cases.diff().fillna(usa.cases)
usa[ '14day_case_avg' ] = usa.case_day.rolling(14).mean()
usa

Unnamed: 0,date,county,state,cases,deaths,FIPStxt,State2,Region,population,current_crate,current_drate,drate,crate,state_death,state_case,case_day,14day_case_avg
1156153,2020-03-27,Winston,Alabama,2,0.00,1133,AL,County,23629,8.46,0.00,507.85,31918.41,18,1,2.00,
1156154,2020-03-28,Winston,Alabama,2,0.00,1133,AL,County,23629,8.46,0.00,507.85,31918.41,18,1,0.00,
1156155,2020-03-29,Winston,Alabama,2,0.00,1133,AL,County,23629,8.46,0.00,507.85,31918.41,18,1,0.00,
1156156,2020-03-30,Winston,Alabama,2,0.00,1133,AL,County,23629,8.46,0.00,507.85,31918.41,18,1,0.00,
1156157,2020-03-31,Winston,Alabama,2,0.00,1133,AL,County,23629,8.46,0.00,507.85,31918.41,18,1,0.00,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1663464,2022-02-27,Crook,Wyoming,1336,25.00,56011,WY,County,7584,17616.03,329.64,342.83,17695.15,10,23,0.00,0.86
1663465,2022-02-28,Crook,Wyoming,1339,25.00,56011,WY,County,7584,17655.59,329.64,342.83,17695.15,10,23,3.00,0.64
1663466,2022-03-01,Crook,Wyoming,1340,26.00,56011,WY,County,7584,17668.78,342.83,342.83,17695.15,10,23,1.00,0.64
1663467,2022-03-02,Crook,Wyoming,1341,26.00,56011,WY,County,7584,17681.96,342.83,342.83,17695.15,10,23,1.00,0.57


In [13]:
# Break out daily death data from the New York Times csv that is cumulative in nature

usa.sort_values(by=['state', 'state_death', 'date'], inplace=True)
usa['death_day'] = usa.groupby(['state','county']).deaths.diff().fillna(usa.deaths)
usa[ '14day_death_avg' ] = usa.death_day.rolling(14).mean()
usa

Unnamed: 0,date,county,state,cases,deaths,FIPStxt,State2,Region,population,current_crate,current_drate,drate,crate,state_death,state_case,case_day,14day_case_avg,death_day,14day_death_avg
1069781,2020-03-26,Lowndes,Alabama,1,0.00,1085,AL,County,9726,10.28,0.00,750.57,26732.47,1,30,1.00,5.93,0.00,
1069782,2020-03-27,Lowndes,Alabama,1,0.00,1085,AL,County,9726,10.28,0.00,750.57,26732.47,1,30,0.00,4.57,0.00,
1069783,2020-03-28,Lowndes,Alabama,1,0.00,1085,AL,County,9726,10.28,0.00,750.57,26732.47,1,30,0.00,4.43,0.00,
1069784,2020-03-29,Lowndes,Alabama,1,0.00,1085,AL,County,9726,10.28,0.00,750.57,26732.47,1,30,0.00,4.14,0.00,
1069785,2020-03-30,Lowndes,Alabama,1,0.00,1085,AL,County,9726,10.28,0.00,750.57,26732.47,1,30,0.00,3.93,0.00,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
449020,2022-02-27,Teton,Wyoming,9767,16.00,56039,WY,County,23464,41625.47,68.19,68.19,41787.42,23,1,0.00,13.50,0.00,0.07
449021,2022-02-28,Teton,Wyoming,9782,16.00,56039,WY,County,23464,41689.40,68.19,68.19,41787.42,23,1,15.00,12.79,0.00,0.07
449022,2022-03-01,Teton,Wyoming,9791,16.00,56039,WY,County,23464,41727.75,68.19,68.19,41787.42,23,1,9.00,12.14,0.00,0.07
449023,2022-03-02,Teton,Wyoming,9794,16.00,56039,WY,County,23464,41740.54,68.19,68.19,41787.42,23,1,3.00,10.86,0.00,0.07


In [14]:
# Export updated national data to csv for use in Tableau

usa.to_csv('daily_covid_us.csv', index=False)

In [15]:
# Filter California data to show only numbers for today

counties_cal = usa[(usa['date'] == '2022-03-03') & (usa['state'] == 'California')]
counties_cal

Unnamed: 0,date,county,state,cases,deaths,FIPStxt,State2,Region,population,current_crate,current_drate,drate,crate,state_death,state_case,case_day,14day_case_avg,death_day,14day_death_avg
542681,2022-03-03,Imperial,California,65566,877.0,6025,CA,County,181215,36181.33,483.96,483.96,36181.33,1,1,124.0,73.29,3.0,1.21
1313696,2022-03-03,Tuolumne,California,12963,179.0,6109,CA,County,54478,23794.93,328.57,328.57,23794.93,2,14,0.0,13.5,4.0,0.57
954218,2022-03-03,Inyo,California,4515,56.0,6027,CA,County,18039,25029.1,310.44,310.44,25029.1,3,10,13.0,3.79,0.0,0.29
82771,2022-03-03,Shasta,California,36378,559.0,6089,CA,County,180080,20201.02,310.42,310.42,20201.02,4,28,58.0,43.79,0.0,1.57
3847,2022-03-03,Los Angeles,California,2801445,30911.0,6037,CA,County,10039107,27905.32,307.91,307.91,27905.32,5,5,1572.0,2012.29,58.0,54.64
269009,2022-03-03,San Bernardino,California,581062,6619.0,6071,CA,County,2180085,26653.18,303.61,303.61,26653.18,6,7,598.0,377.0,31.0,17.14
119709,2022-03-03,Stanislaus,California,135804,1667.0,6099,CA,County,550660,24662.04,302.73,302.73,24662.04,7,13,173.0,169.93,7.0,4.14
864065,2022-03-03,Merced,California,70963,794.0,6047,CA,County,277680,25555.68,285.94,285.94,25555.68,8,9,84.0,75.07,4.0,1.43
1158980,2022-03-03,Kings,California,54397,431.0,6031,CA,County,152940,35567.54,281.81,281.81,35567.54,9,2,190.0,105.71,1.0,0.71
148626,2022-03-03,Tulare,California,131458,1302.0,6107,CA,County,466195,28198.07,279.28,279.28,28198.07,10,4,289.0,162.14,0.0,1.36


In [16]:
# Show top 10 California counties based on largest current infection rate

counties_cal.sort_values(by=['crate'], inplace=True, ascending=False)
top10_ca_case_rates = counties_cal.head(10)
top10_ca_case_rates

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  counties_cal.sort_values(by=['crate'], inplace=True, ascending=False)


Unnamed: 0,date,county,state,cases,deaths,FIPStxt,State2,Region,population,current_crate,current_drate,drate,crate,state_death,state_case,case_day,14day_case_avg,death_day,14day_death_avg
542681,2022-03-03,Imperial,California,65566,877.0,6025,CA,County,181215,36181.33,483.96,483.96,36181.33,1,1,124.0,73.29,3.0,1.21
1158980,2022-03-03,Kings,California,54397,431.0,6031,CA,County,152940,35567.54,281.81,281.81,35567.54,9,2,190.0,105.71,1.0,0.71
2041558,2022-03-03,Lassen,California,10666,67.0,6035,CA,County,30573,34886.99,219.15,219.15,34886.99,21,3,4.0,15.21,0.0,0.14
148626,2022-03-03,Tulare,California,131458,1302.0,6107,CA,County,466195,28198.07,279.28,279.28,28198.07,10,4,289.0,162.14,0.0,1.36
3847,2022-03-03,Los Angeles,California,2801445,30911.0,6037,CA,County,10039107,27905.32,307.91,307.91,27905.32,5,5,1572.0,2012.29,58.0,54.64
54440,2022-03-03,Madera,California,43052,354.0,6039,CA,County,157327,27364.66,225.01,225.01,27364.66,19,6,82.0,61.21,0.0,1.71
269009,2022-03-03,San Bernardino,California,581062,6619.0,6071,CA,County,2180085,26653.18,303.61,303.61,26653.18,6,7,598.0,377.0,31.0,17.14
338697,2022-03-03,Kern,California,238866,2136.0,6029,CA,County,900202,26534.71,237.28,237.28,26534.71,16,8,746.0,588.29,6.0,2.93
864065,2022-03-03,Merced,California,70963,794.0,6047,CA,County,277680,25555.68,285.94,285.94,25555.68,8,9,84.0,75.07,4.0,1.43
954218,2022-03-03,Inyo,California,4515,56.0,6027,CA,County,18039,25029.1,310.44,310.44,25029.1,3,10,13.0,3.79,0.0,0.29


In [17]:
# Show top 10 California counties based on largest current death rate

counties_cal.sort_values(by=['drate'], inplace=True, ascending=False)
top10_ca_death_rates = counties_cal.head(10)
top10_ca_death_rates

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  counties_cal.sort_values(by=['drate'], inplace=True, ascending=False)


Unnamed: 0,date,county,state,cases,deaths,FIPStxt,State2,Region,population,current_crate,current_drate,drate,crate,state_death,state_case,case_day,14day_case_avg,death_day,14day_death_avg
542681,2022-03-03,Imperial,California,65566,877.0,6025,CA,County,181215,36181.33,483.96,483.96,36181.33,1,1,124.0,73.29,3.0,1.21
1313696,2022-03-03,Tuolumne,California,12963,179.0,6109,CA,County,54478,23794.93,328.57,328.57,23794.93,2,14,0.0,13.5,4.0,0.57
954218,2022-03-03,Inyo,California,4515,56.0,6027,CA,County,18039,25029.1,310.44,310.44,25029.1,3,10,13.0,3.79,0.0,0.29
82771,2022-03-03,Shasta,California,36378,559.0,6089,CA,County,180080,20201.02,310.42,310.42,20201.02,4,28,58.0,43.79,0.0,1.57
3847,2022-03-03,Los Angeles,California,2801445,30911.0,6037,CA,County,10039107,27905.32,307.91,307.91,27905.32,5,5,1572.0,2012.29,58.0,54.64
269009,2022-03-03,San Bernardino,California,581062,6619.0,6071,CA,County,2180085,26653.18,303.61,303.61,26653.18,6,7,598.0,377.0,31.0,17.14
119709,2022-03-03,Stanislaus,California,135804,1667.0,6099,CA,County,550660,24662.04,302.73,302.73,24662.04,7,13,173.0,169.93,7.0,4.14
864065,2022-03-03,Merced,California,70963,794.0,6047,CA,County,277680,25555.68,285.94,285.94,25555.68,8,9,84.0,75.07,4.0,1.43
1158980,2022-03-03,Kings,California,54397,431.0,6031,CA,County,152940,35567.54,281.81,281.81,35567.54,9,2,190.0,105.71,1.0,0.71
148626,2022-03-03,Tulare,California,131458,1302.0,6107,CA,County,466195,28198.07,279.28,279.28,28198.07,10,4,289.0,162.14,0.0,1.36


In [18]:
# Create dataframe with historical data for Top 10 counties based on current case rate

top10c_df = top10_ca_case_rates[['county', 'state']]
top10_ca_case_df = pd.merge(usa, top10c_df, on=['county', 'state'])
top10_ca_case_df['date'] = pd.to_datetime(top10_ca_case_df['date']).dt.strftime('%m/%d/%y')
top10_ca_case_df = top10_ca_case_df[['date', 'county','cases', 'current_crate', 'state_case', 'case_day', '14day_case_avg']]
top10_ca_case_df

Unnamed: 0,date,county,cases,current_crate,state_case,case_day,14day_case_avg
0,03/20/20,Imperial,4,2.21,1,4.00,0.29
1,03/21/20,Imperial,4,2.21,1,0.00,0.29
2,03/22/20,Imperial,4,2.21,1,0.00,0.29
3,03/23/20,Imperial,4,2.21,1,0.00,0.29
4,03/24/20,Imperial,9,4.97,1,5.00,0.43
...,...,...,...,...,...,...,...
7138,02/27/22,Lassen,10637,34792.14,3,0.00,19.00
7139,02/28/22,Lassen,10637,34792.14,3,0.00,19.00
7140,03/01/22,Lassen,10648,34828.12,3,11.00,16.57
7141,03/02/22,Lassen,10662,34873.91,3,14.00,15.57


In [19]:
# Create dataframe with historical data for Top 10 counties based on current death rate

top10d_df = top10_ca_death_rates[['county', 'state']]
top10_ca_death_df = pd.merge(usa, top10d_df, on=['county', 'state'])
top10_ca_death_df['date'] = pd.to_datetime(top10_ca_death_df['date']).dt.strftime('%m/%d/%y')
top10_ca_death_df = top10_ca_death_df[['date', 'county','deaths', 'current_drate', 'state_death', 'death_day', '14day_death_avg']]
top10_ca_death_df

Unnamed: 0,date,county,deaths,current_drate,state_death,death_day,14day_death_avg
0,03/20/20,Imperial,0.00,0.00,1,0.00,0.00
1,03/21/20,Imperial,0.00,0.00,1,0.00,0.00
2,03/22/20,Imperial,0.00,0.00,1,0.00,0.00
3,03/23/20,Imperial,0.00,0.00,1,0.00,0.00
4,03/24/20,Imperial,0.00,0.00,1,0.00,0.00
...,...,...,...,...,...,...,...
7197,02/27/22,Tulare,1295.00,277.78,10,0.00,1.57
7198,02/28/22,Tulare,1300.00,278.85,10,5.00,1.71
7199,03/01/22,Tulare,1300.00,278.85,10,0.00,1.71
7200,03/02/22,Tulare,1302.00,279.28,10,2.00,1.64


In [20]:
# Export California Top 10 case data to csv

top10_ca_case_df.to_csv('top10cases.csv', float_format='%.2f', index=False)

In [21]:
# Export California Top 10 death data to csv

top10_ca_death_df.to_csv('top10deaths.csv', float_format='%.2f', index=False)