# COVID Case & Death Data Processing & Grouping by Census Division

In [None]:
%matplotlib inline        
import matplotlib.pyplot as plt        
import seaborn; seaborn.set()  # set plot style
from datetime import date
import time
import numpy as np
import pandas as pd        

In [None]:
covid_confirmed_cases = pd.read_csv('https://usafactsstatic.blob.core.windows.net/public/data/covid-19/covid_confirmed_usafacts.csv')
covid_county_population = pd.read_csv('https://usafactsstatic.blob.core.windows.net/public/data/covid-19/covid_county_population_usafacts.csv')
covid_deaths = pd.read_csv('https://usafactsstatic.blob.core.windows.net/public/data/covid-19/covid_deaths_usafacts.csv')
us_state_vaccinations = pd.read_csv('https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/us_state_vaccinations.csv') # unused

In [None]:
new_england = ["CT", "ME", "MA", "NH", "RI", "VT"]
mid_atlantic = ["NJ", "NY", "PA"]
east_north_central = ["IL", "IN", "MI", "OH", "WI"]
west_north_central = ["IA", "KS", "MN", "MO", "NE", "ND", "SD"]
south_atlantic = ["DE", "FL", "GA", "MD", "NC", "SC", "VA", "WV"]
east_south_central = ["AL", "KY", "MS", "TN"]
west_south_central = ["AR", "LA", "OK", "TX"]
mountain = ["AZ", "CO", "ID", "MT", "NV", "NM", "UT", "WY"]
pacific = ["AK", "CA", "HI", "OR", "WA"]

In [None]:
# processing new covid confirmed cases, deaths and population datasets to remove Statewide Unallocated data
covid_confirmed_cases = covid_confirmed_cases.loc[covid_confirmed_cases['County Name'] != 'Statewide Unallocated']
covid_deaths = covid_deaths.loc[covid_deaths['County Name'] != 'Statewide Unallocated']
covid_county_population = covid_county_population.loc[covid_county_population['County Name'] != 'Statewide Unallocated']

In [None]:
# returns dataframe modified from daily to weekly time frame
def daily_to_weekly(dataframe_name): 
    counts_states = dataframe_name.drop(columns = ['countyFIPS', 'StateFIPS','County Name', '2020-01-22', 
                                                         '2020-01-23', '2020-01-24', '2020-01-25'])
    counts_states_daily_total = counts_states.groupby('State').sum()    
    
    counts_states_daily_total = counts_states_daily_total.drop(columns = ['2020-01-26'])
    
    # converting daily data to weekly for new covid confirmed cases dataset
    counts_states_daily_total_modified = counts_states_daily_total.T.reset_index()
    
    counts_states_daily_total_modified = counts_states_daily_total_modified.assign(Weeks = counts_states_daily_total_modified['index']).drop(columns = 'index')
    
    counts_states_daily_total_modified['Weeks'] = counts_states_daily_total_modified['Weeks'].astype('datetime64[ns]')
    
    counts_states_weekly_total = counts_states_daily_total_modified.resample('W-Mon', label='left', closed = 'left', on='Weeks').max()
   
    return(counts_states_weekly_total)

In [None]:
# returns cases/deaths in designated time frame
def get_by_period(df, start_date, end_date):
  after_start_date = df["Weeks"] >= start_date
  before_end_date = df["Weeks"] <= end_date
  between_two_dates = after_start_date & before_end_date
  filtered_dates = df.loc[between_two_dates]
  
  return filtered_dates

In [None]:
# determining population total for all states; territories population numbers are not included;
covid_county_population_total = covid_county_population['population'].sum()

# grouping population data by State and dropping not needed column
covid_county_population_total_by_state = covid_county_population.groupby('State').sum().drop(columns = 'countyFIPS')

**Cases**

In [None]:
# converting daily to weekly time reference
cases_states_weekly_total = daily_to_weekly(covid_confirmed_cases)

cases_fall = get_by_period(cases_states_weekly_total, "2020-06-01", "2020-12-31")
cases_spr = get_by_period(cases_states_weekly_total, "2021-1-1", "2021-7-1")

total_cases_fall = cases_fall.iloc[-1 ,:-1] - cases_fall.iloc[0 ,:-1]
total_cases_spr = cases_spr.iloc[-1 ,:-1] - cases_spr.iloc[0 ,:-1]

total_cases_fall = pd.DataFrame(total_cases_fall, columns=['cases'])
total_cases_spr = pd.DataFrame(total_cases_spr, columns=['cases'])

In [None]:
print(total_cases_fall['cases'].sum())
print(total_cases_spr['cases'].sum())

16756968
12617442


In [None]:
covid_percentage_fall = total_cases_fall['cases'].div(covid_county_population_total_by_state['population']) * 100
covid_percentage_spr = total_cases_spr['cases'].div(covid_county_population_total_by_state['population']) * 100

In [None]:
print(total_cases_fall['cases'].sum())
print(total_cases_spr['cases'].sum())

In [None]:
new_england_covid_percentage = covid_percentage_fall[new_england].mean()
mid_atlantic_covid_percentage = covid_percentage_fall[mid_atlantic].mean()
east_north_central_covid_percentage = covid_percentage_fall[east_north_central].mean()
west_north_central_covid_percentage = covid_percentage_fall[west_north_central].mean()
south_atlantic_covid_percentage = covid_percentage_fall[south_atlantic].mean()
east_south_central_covid_percentage = covid_percentage_fall[east_south_central].mean()
west_south_central_covid_percentage = covid_percentage_fall[west_south_central].mean()
mountain_covid_percentage = covid_percentage_fall[mountain].mean()
pacific_covid_percentage = covid_percentage_fall[pacific].mean()

print(new_england_covid_percentage)
print(mid_atlantic_covid_percentage)
print(east_north_central_covid_percentage)
print(west_north_central_covid_percentage)
print(south_atlantic_covid_percentage)
print(east_south_central_covid_percentage)
print(west_south_central_covid_percentage)
print(mountain_covid_percentage)
print(pacific_covid_percentage)

2.87695543122557
3.4372237388163573
6.33303941595009
8.091565578753393
4.451383229853076
6.485332182535187
6.157937392531796
6.6513582043307125
3.4804517410057656


In [None]:
new_england_covid_percentage = covid_percentage_spr[new_england].mean()
mid_atlantic_covid_percentage = covid_percentage_spr[mid_atlantic].mean()
east_north_central_covid_percentage = covid_percentage_spr[east_north_central].mean()
west_north_central_covid_percentage = covid_percentage_spr[west_north_central].mean()
south_atlantic_covid_percentage = covid_percentage_spr[south_atlantic].mean()
east_south_central_covid_percentage = covid_percentage_spr[east_south_central].mean()
west_south_central_covid_percentage = covid_percentage_spr[west_south_central].mean()
mountain_covid_percentage = covid_percentage_spr[mountain].mean()
pacific_covid_percentage = covid_percentage_spr[pacific].mean()

print(new_england_covid_percentage)
print(mid_atlantic_covid_percentage)
print(east_north_central_covid_percentage)
print(west_north_central_covid_percentage)
print(south_atlantic_covid_percentage)
print(east_south_central_covid_percentage)
print(west_south_central_covid_percentage)
print(mountain_covid_percentage)
print(pacific_covid_percentage)

4.104581590152892
5.085395401215568
3.422274059230989
2.9094031082019614
4.108145517436194
3.7419580558513004
3.7802309974428185
3.403795555115627
2.471691099996717


**Deaths**

In [None]:
# converting daily to weekly time reference
deaths_states_weekly_total = daily_to_weekly(covid_deaths)

deaths_fall = get_by_period(deaths_states_weekly_total, "2020-06-01", "2020-12-31")
deaths_spr = get_by_period(deaths_states_weekly_total, "2021-1-1", "2021-7-1")

total_deaths_fall = deaths_fall.iloc[-1 ,:-1] - deaths_fall.iloc[0 ,:-1]
total_deaths_spr = deaths_spr.iloc[-1 ,:-1] - deaths_spr.iloc[0 ,:-1]

total_deaths_fall = pd.DataFrame(total_deaths_fall, columns=['cases'])
total_deaths_spr = pd.DataFrame(total_deaths_spr, columns=['cases'])

In [None]:
deaths_percentage_fall = total_deaths_fall['cases'].div(covid_county_population_total_by_state['population']) * 100
deaths_percentage_spr = total_deaths_spr['cases'].div(covid_county_population_total_by_state['population']) * 100

In [None]:
new_england_covid_percentage = deaths_percentage_fall[new_england].mean()
mid_atlantic_covid_percentage = deaths_percentage_fall[mid_atlantic].mean()
east_north_central_covid_percentage = deaths_percentage_fall[east_north_central].mean()
west_north_central_covid_percentage = deaths_percentage_fall[west_north_central].mean()
south_atlantic_covid_percentage = deaths_percentage_fall[south_atlantic].mean()
east_south_central_covid_percentage = deaths_percentage_fall[east_south_central].mean()
west_south_central_covid_percentage = deaths_percentage_fall[west_south_central].mean()
mountain_covid_percentage = deaths_percentage_fall[mountain].mean()
pacific_covid_percentage = deaths_percentage_fall[pacific].mean()

print(new_england_covid_percentage)
print(mid_atlantic_covid_percentage)
print(east_north_central_covid_percentage)
print(west_north_central_covid_percentage)
print(south_atlantic_covid_percentage)
print(east_south_central_covid_percentage)
print(west_south_central_covid_percentage)
print(mountain_covid_percentage)
print(pacific_covid_percentage)

0.046374580827569685
0.05905766822073005
0.07076216023354316
0.09741081763518489
0.06151975877044149
0.0857277100217408
0.08561815976973948
0.07097546105114794
0.029647461990574886


In [None]:
new_england_covid_percentage = deaths_percentage_spr[new_england].mean()
mid_atlantic_covid_percentage = deaths_percentage_spr[mid_atlantic].mean()
east_north_central_covid_percentage = deaths_percentage_spr[east_north_central].mean()
west_north_central_covid_percentage = deaths_percentage_spr[west_north_central].mean()
south_atlantic_covid_percentage = deaths_percentage_spr[south_atlantic].mean()
east_south_central_covid_percentage = deaths_percentage_spr[east_south_central].mean()
west_south_central_covid_percentage = deaths_percentage_spr[west_south_central].mean()
mountain_covid_percentage = deaths_percentage_spr[mountain].mean()
pacific_covid_percentage = deaths_percentage_spr[pacific].mean()

print(new_england_covid_percentage)
print(mid_atlantic_covid_percentage)
print(east_north_central_covid_percentage)
print(west_north_central_covid_percentage)
print(south_atlantic_covid_percentage)
print(east_south_central_covid_percentage)
print(west_south_central_covid_percentage)
print(mountain_covid_percentage)
print(pacific_covid_percentage)

0.0545208786569808
0.08237951788293428
0.07260541709449206
0.05412796117231704
0.07732050809543047
0.09930961328330697
0.08553192103418158
0.06355509683974972
0.03872182862225765
