In [1]:
import pandas as pd
import numpy as np
import warnings

warnings.simplefilter(action='ignore',category=FutureWarning)

covid_df = pd.read_csv("us_counties_covid19_daily.csv")
covid_df = covid_df.drop(['fips','deaths'],1)
covid_df.head()

Unnamed: 0,date,county,state,cases
0,2020-01-21,Snohomish,Washington,1
1,2020-01-22,Snohomish,Washington,1
2,2020-01-23,Snohomish,Washington,1
3,2020-01-24,Cook,Illinois,1
4,2020-01-24,Snohomish,Washington,1


In [2]:
# Select data for dense county data
dense_state_county = [
    ('Maryland','Baltimore'),
    ('New Jersey','Essex'),
    ('Illinois','Cook'),
    ('New Jersey','Union'),
    ('Virginia','Norfolk city'),
    ('New York','Nassau'),
    ('Virginia','Harrisonburg city')
]

In [3]:
# Process data for each dense county

# Create new DataFrame to bring data for each county together
dense_county_covid_df = pd.DataFrame()

for state, county in dense_state_county:
    # Get the correct county data
    county_df = covid_df[(covid_df['state'] == state) & (covid_df['county'] == county)].reset_index().drop(['index'],1)

    # new_cases column
    # record the number of new cases for today
    county_df['new_cases'] = pd.Series(dtype=int)

    for i in range(1,len(county_df.index)):
        current_cases = county_df.iloc[i,3]
        previous_cases = county_df.iloc[i-1,3]

        county_df.iloc[i,4] = current_cases - previous_cases

    # delta7 column
    # record the change in number of cases from 7 days ago to today
    county_df['delta7'] = pd.Series(dtype=int)

    for i in range(7, len(county_df.index)):
        current_cases = county_df.iloc[i,3]
        previous_cases = county_df.iloc[i-7,3]

        county_df.iloc[i,5] = current_cases - previous_cases
    
    # delta14 column
    # record the change in number of cases from 7 days ago to today
    county_df['delta14'] = pd.Series(dtype=int)

    for i in range(14,len(county_df.index)):
        current_cases = county_df.iloc[i,3]
        previous_cases = county_df.iloc[i-14,3]

        county_df.iloc[i,6] = current_cases - previous_cases
    
    # remove secondary words like 'city' from county names
    for i in range(0,len(county_df.index)):
        county_df.iloc[i,1] = county.split(' ')[0]

    # Add to final_df
    dense_county_covid_df = pd.concat([dense_county_covid_df,county_df])

# Convert float values to int
dense_county_covid_df = dense_county_covid_df.convert_dtypes()
dense_county_covid_df.head(30)

# make all county and state names lowercase, remove secondary words like 'city'
dense_county_covid_df['county'] = dense_county_covid_df['county'].str.lower()
dense_county_covid_df['state'] = dense_county_covid_df['state'].str.lower()

# rename 'cases' to 'total_cases'
dense_county_covid_df = dense_county_covid_df.rename(columns={'cases':'total_cases'})

# reset index
dense_county_covid_df = dense_county_covid_df.reset_index().drop(['index'],1)

# save county results to csv
dense_county_covid_df.to_csv("dense_county_covid.csv")

# check results
dense_county_covid_df.head(30)

Unnamed: 0,date,county,state,total_cases,new_cases,delta7,delta14
0,2020-03-11,baltimore,maryland,1,,,
1,2020-03-12,baltimore,maryland,1,0.0,,
2,2020-03-13,baltimore,maryland,2,1.0,,
3,2020-03-14,baltimore,maryland,3,1.0,,
4,2020-03-15,baltimore,maryland,3,0.0,,
5,2020-03-16,baltimore,maryland,4,1.0,,
6,2020-03-17,baltimore,maryland,4,0.0,,
7,2020-03-18,baltimore,maryland,7,3.0,6.0,
8,2020-03-19,baltimore,maryland,13,6.0,12.0,
9,2020-03-20,baltimore,maryland,13,0.0,11.0,


In [4]:
# Select data for sparse county data
sparse_state_county = [
    ('Virginia','Fairfax'),
    ('New Jersey','Camden'),
    ('Texas','Harris'),
    ('Ohio','Franklin'),
    ('Indiana','Marion'),
    ('Georgia','DeKalb'),
    ('Florida','Duval'),
    ('North Carolina','Wake'),
    ('Texas','Bexar')
]

In [5]:
# Process data for each sparse county

# Create new DataFrame to bring data for each county together
sparse_county_covid_df = pd.DataFrame()

for state, county in sparse_state_county:
    # Get the correct county data
    county_df = covid_df[(covid_df['state'] == state) & (covid_df['county'] == county)].reset_index().drop(['index'],1)

    # new_cases column
    # record the number of new cases for today
    county_df['new_cases'] = pd.Series(dtype=int)

    for i in range(1,len(county_df.index)):
        current_cases = county_df.iloc[i,3]
        previous_cases = county_df.iloc[i-1,3]

        county_df.iloc[i,4] = current_cases - previous_cases

    # delta7 column
    # record the change in number of cases from 7 days ago to today
    county_df['delta7'] = pd.Series(dtype=int)

    for i in range(7, len(county_df.index)):
        current_cases = county_df.iloc[i,3]
        previous_cases = county_df.iloc[i-7,3]

        county_df.iloc[i,5] = current_cases - previous_cases
    
    # delta14 column
    # record the change in number of cases from 7 days ago to today
    county_df['delta14'] = pd.Series(dtype=int)

    for i in range(14,len(county_df.index)):
        current_cases = county_df.iloc[i,3]
        previous_cases = county_df.iloc[i-14,3]

        county_df.iloc[i,6] = current_cases - previous_cases

    # Add to final_df
    sparse_county_covid_df = pd.concat([sparse_county_covid_df,county_df])

# Convert float values to int
sparse_county_covid_df = sparse_county_covid_df.convert_dtypes()
sparse_county_covid_df.head(30)

# make all county and state names lowercase, remove secondary words like 'city'
sparse_county_covid_df['county'] = sparse_county_covid_df['county'].str.lower()
sparse_county_covid_df['state'] = sparse_county_covid_df['state'].str.lower()

# rename 'cases' to 'total_cases'
sparse_county_covid_df = sparse_county_covid_df.rename(columns={'cases':'total_cases'})

# reset index
sparse_county_covid_df = sparse_county_covid_df.reset_index().drop(['index'],1)

# save county results to csv
sparse_county_covid_df.to_csv("sparse_county_covid.csv")

# check results
sparse_county_covid_df.head(30)

Unnamed: 0,date,county,state,total_cases,new_cases,delta7,delta14
0,2020-03-07,fairfax,virginia,1,,,
1,2020-03-08,fairfax,virginia,2,1.0,,
2,2020-03-09,fairfax,virginia,4,2.0,,
3,2020-03-10,fairfax,virginia,4,0.0,,
4,2020-03-11,fairfax,virginia,4,0.0,,
5,2020-03-12,fairfax,virginia,4,0.0,,
6,2020-03-13,fairfax,virginia,6,2.0,,
7,2020-03-14,fairfax,virginia,10,4.0,9.0,
8,2020-03-15,fairfax,virginia,10,0.0,8.0,
9,2020-03-16,fairfax,virginia,10,0.0,6.0,
