In [3]:
# Dependencies
import pandas as pd
import numpy as np
import warnings

warnings.simplefilter(action='ignore',category=FutureWarning)

In [4]:
# Reading in covid data
covid_df = pd.read_csv("us_counties_covid19_daily.csv")
covid_df = covid_df.drop(['fips','deaths'],1)
covid_df['county']=covid_df['county'].apply(str.lower)
covid_df['state']=covid_df['state'].apply(str.lower)
covid_df.head()

Unnamed: 0,date,county,state,cases
0,2020-01-21,snohomish,washington,1
1,2020-01-22,snohomish,washington,1
2,2020-01-23,snohomish,washington,1
3,2020-01-24,cook,illinois,1
4,2020-01-24,snohomish,washington,1


In [5]:
# Reading in county pop data
dense_county_df = pd.read_csv("dense_county_pop.csv",index_col=0)
dense_county_df = dense_county_df.drop(columns=['avg_household','total_pop','pop_dens(/sqmi)','nchs_code_2013'])
cols = ['state','county']
dense_county_df = dense_county_df[cols]

sparse_county_df = pd.read_csv("sparse_county_pop.csv",index_col=0)
sparse_county_df = sparse_county_df.drop(columns=['avg_household','total_pop','pop_dens(/sqmi)','nchs_code_2013'])
cols = ['state','county']
sparse_county_df = sparse_county_df[cols]

In [6]:
# Creating lists
dense_state_county = dense_county_df.values.tolist()
print(dense_state_county)

sparse_state_county = sparse_county_df.values.tolist()
print(sparse_state_county)

[['california', 'san francisco'], ['district of columbia', 'district of'], ['massachusetts', 'suffolk'], ['new jersey', 'hudson'], ['new york', 'bronx'], ['new york', 'new york'], ['new york', 'queens'], ['pennsylvania', 'philadelphia'], ['virginia', 'arlington'], ['virginia', 'alexandria']]
[['california', 'orange'], ['colorado', 'denver'], ['new jersey', 'middlesex'], ['new york', 'nassau'], ['florida', 'pinellas'], ['illinois', 'dupage'], ['maryland', 'baltimore'], ['new jersey', 'essex'], ['minnesota', 'ramsey'], ['missouri', 'st. louis'], ['new jersey', 'bergen'], ['new jersey', 'passaic'], ['ohio', 'cuyahoga'], ['virginia', 'fairfax'], ['virginia', 'fairfax'], ['virginia', 'falls church'], ['virginia', 'fredericksburg'], ['virginia', 'hampton'], ['virginia', 'manassas'], ['virginia', 'manassas park'], ['virginia', 'newport news'], ['virginia', 'norfolk'], ['virginia', 'portsmouth'], ['virginia', 'richmond'], ['wisconsin', 'milwaukee']]


In [11]:
# Process data for each dense county

# Create new DataFrame to bring data for each county together
dense_county_covid_df = pd.DataFrame()

for state, county in dense_state_county:
    # Get the correct county data
    county_df = covid_df[(covid_df['state'] == state) & (covid_df['county'] == county)].reset_index().drop(['index'],1)

    # new_cases column
    # record the number of new cases for today
    county_df['new_cases'] = pd.Series(dtype=int)

    # Set new_cases for first row = total_cases
    county_df.iloc[0,4] = county_df.iloc[0,3]

    for i in range(1,len(county_df.index)):
        current_cases = county_df.iloc[i,3]
        previous_cases = county_df.iloc[i-1,3]

        county_df.iloc[i,4] = current_cases - previous_cases

    # delta7 column
    # record the change in number of cases from 7 days ago to today
    county_df['future_delta7'] = pd.Series(dtype=int)

    for i in range(0, len(county_df.index)-7):
        current_cases = county_df.iloc[i,3]
        future_cases = county_df.iloc[i+7,3]

        county_df.iloc[i,5] = future_cases - current_cases
    
    # delta14 column
    # record the change in number of cases from 7 days ago to today
    county_df['future_delta14'] = pd.Series(dtype=int)

    for i in range(0,len(county_df.index)-14):
        current_cases = county_df.iloc[i,3]
        future_cases = county_df.iloc[i+14,3]

        county_df.iloc[i,6] = future_cases - current_cases
    
    # remove secondary words like 'city' from county names
    for i in range(0,len(county_df.index)):
        county_df.iloc[i,1] = county.split(' ')[0]

    # Add to final_df
    dense_county_covid_df = pd.concat([dense_county_covid_df,county_df])

# Convert float values to int
dense_county_covid_df = dense_county_covid_df.convert_dtypes()
dense_county_covid_df.head(30)

# make all county and state names lowercase, remove secondary words like 'city'
dense_county_covid_df['county'] = dense_county_covid_df['county'].str.lower()
dense_county_covid_df['state'] = dense_county_covid_df['state'].str.lower()

# rename 'cases' to 'total_cases'
dense_county_covid_df = dense_county_covid_df.rename(columns={'cases':'total_cases'})

# reset index
dense_county_covid_df = dense_county_covid_df.reset_index().drop(['index'],1)

# save county results to csv
dense_county_covid_df.to_csv("data/covid/dense_county_covid.csv")

# check results
dense_county_covid_df.head(30)

KeyError: "['index'] not found in axis"

In [None]:
# Verify end of data
dense_county_covid_df.tail(30)

In [12]:
# Process data for each sparse county

# Create new DataFrame to bring data for each county together
sparse_county_covid_df = pd.DataFrame()

for state, county in sparse_state_county:
    # Get the correct county data
    county_df = covid_df[(covid_df['state'] == state) & (covid_df['county'] == county)].reset_index().drop(['index'],1)

    # new_cases column
    # record the number of new cases for today
    county_df['new_cases'] = pd.Series(dtype=int)

    # Set new_cases for first row = total_cases
    county_df.iloc[0,4] = county_df.iloc[0,3]

    for i in range(1,len(county_df.index)):
        current_cases = county_df.iloc[i,3]
        previous_cases = county_df.iloc[i-1,3]

        county_df.iloc[i,4] = current_cases - previous_cases

    # delta7 column
    # record the change in number of cases from 7 days ago to today
    county_df['future_delta7'] = pd.Series(dtype=int)

    for i in range(0, len(county_df.index)-7):
        current_cases = county_df.iloc[i,3]
        future_cases = county_df.iloc[i+7,3]

        county_df.iloc[i,5] = future_cases - current_cases
    
    # delta14 column
    # record the change in number of cases from 7 days ago to today
    county_df['future_delta14'] = pd.Series(dtype=int)

    for i in range(0,len(county_df.index)-14):
        current_cases = county_df.iloc[i,3]
        future_cases = county_df.iloc[i+14,3]

        county_df.iloc[i,6] = future_cases - current_cases

    # Add to final_df
    sparse_county_covid_df = pd.concat([sparse_county_covid_df,county_df])

# Convert float values to int
sparse_county_covid_df = sparse_county_covid_df.convert_dtypes()
sparse_county_covid_df.head(30)

# make all county and state names lowercase, remove secondary words like 'city'
sparse_county_covid_df['county'] = sparse_county_covid_df['county'].str.lower()
sparse_county_covid_df['state'] = sparse_county_covid_df['state'].str.lower()

# rename 'cases' to 'total_cases'
sparse_county_covid_df = sparse_county_covid_df.rename(columns={'cases':'total_cases'})

# reset index
sparse_county_covid_df = sparse_county_covid_df.reset_index().drop(['index'],1)

# save county results to csv
sparse_county_covid_df.to_csv("data/covid/sparse_county_covid.csv")

# check results
sparse_county_covid_df.head(30)

IndexError: index 0 is out of bounds for axis 0 with size 0

In [None]:
# Verify end of data
sparse_county_covid_df.tail(30)