# Analysis of Excess Deaths & Vaccine Status by US States

In [53]:
import pandas as pd
import numpy as np
import datetime
import calendar
import requests
import os

import statsmodels.formula.api as smf
import statsmodels.api as sm

In [54]:
start_date = datetime.date(2022, 1, 1)

In [55]:
end_date = datetime.date(2022, 10, 1)

### Data Sources

In [56]:
# https://data.cdc.gov/NCHS/Excess-Deaths-Associated-with-COVID-19/xkkf-xrst
excess_deaths = "Excess_Deaths_Associated_with_COVID-19.csv"
vaccinated = "https://github.com/owid/covid-19-data/raw/master/public/data/vaccinations/us_state_vaccinations.csv"
state_demographics = "https://corgis-edu.github.io/corgis/datasets/csv/state_demographics/state_demographics.csv"

In [57]:
ef = pd.read_csv(excess_deaths)

In [58]:
ef['Excess Estimate'] = pd.to_numeric(ef['Excess Estimate'], errors='coerce').fillna(0)

In [59]:
ef['Week Ending Date'] = pd.to_datetime(ef['Week Ending Date'], errors='coerce').dt.date

In [60]:
date_mask = (ef['Week Ending Date'] >= start_date) & (ef['Week Ending Date'] <= end_date)
cause_mask = (ef['Outcome'] == 'All causes, excluding COVID-19')
sum(date_mask), sum(cause_mask)

(6480, 16524)

In [61]:
ef = ef[date_mask & cause_mask]

In [62]:
ef = ef.groupby('State').sum()

In [63]:
ef = ef['Excess Estimate'].to_frame()

### State Demographics

In [64]:
df = pd.read_csv(state_demographics)

In [65]:
df = df.set_index('State')

In [66]:
df = df.rename(columns={'Population.2014 Population':'Population', 'Age.Percent 65 and Older':'Pop65', 
                        'Income.Per Capita Income':'Income', 'Income.Persons Below Poverty Level':'Poor',
                       'Population.Population per Square Mile':'PopDensity'})

In [67]:
df = df['Population'].to_frame()

## Vaccinated

In [68]:
vf = pd.read_csv(vaccinated)

In [69]:
vf = vf.groupby('location').max()
vf = vf['people_fully_vaccinated_per_hundred'].to_frame()

In [70]:
vf.columns = ['Vaccinated']

## Merge

In [71]:
df = df.merge(vf, how='left', right_index=True, left_index=True)

In [72]:
df = df.merge(ef, how='left', right_index=True, left_index=True)

In [73]:
df['ExcessPer100k'] = df['Excess Estimate'] * 100000 / df['Population']

In [74]:
df['Constant'] = 1

## Model

In [75]:
mask = df['ExcessPer100k'] > 0
model = sm.OLS(df[mask]['ExcessPer100k'], df[mask][['Vaccinated', 'Constant']], missing='drop').fit()
model.summary()

0,1,2,3
Dep. Variable:,ExcessPer100k,R-squared:,0.006
Model:,OLS,Adj. R-squared:,-0.014
Method:,Least Squares,F-statistic:,0.3063
Date:,"Fri, 25 Nov 2022",Prob (F-statistic):,0.583
Time:,11:19:43,Log-Likelihood:,-217.12
No. Observations:,50,AIC:,438.2
Df Residuals:,48,BIC:,442.1
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Vaccinated,0.1567,0.283,0.553,0.583,-0.413,0.726
Constant,19.6419,19.215,1.022,0.312,-18.992,58.276

0,1,2,3
Omnibus:,25.727,Durbin-Watson:,1.97
Prob(Omnibus):,0.0,Jarque-Bera (JB):,54.082
Skew:,1.463,Prob(JB):,1.8e-12
Kurtosis:,7.171,Cond. No.,485.0
