In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np

In [None]:
#read population data
pop_df = pd.read_csv('state_pops_2021.csv')
pop_df['Population'] = pop_df['Population (in millions)']*1000000
pop_df = pop_df[['State', 'Population']]
pop_df.head()

In [None]:
#read election data
el_df = pd.read_csv('ElectionResults2020.csv')
el_df = el_df.iloc[4: , :]
el_df = el_df[['state', 'called']]
el_df = el_df.rename(columns = {'state': 'State', 'called':'2020 Election Result'})
el_df.head()

In [None]:
#combine election and population datasets
el_pop_df = pd.merge(el_df, pop_df, on='State' )
el_pop_df.head()

In [None]:
#read covid-19 data and preview it
covid_df = pd.read_csv('https://query.data.world/s/254wzyu352gsbcze7tu3pckdjuuhm3')
covid_df.head()

In [None]:
#Narrow dataset to only united states
us_df = covid_df[covid_df['COUNTRY_SHORT_NAME'] == 'United States']
#because we are looking only at cumulative data, I only want the most recent totals
today_df = us_df[us_df['REPORT_DATE']==max(us_df['REPORT_DATE'])]
#group by totals for each state
state_grouped_df = today_df.groupby(['PROVINCE_STATE_NAME']).sum().reset_index()
#narrow to only cols of interest
state_covid_df = state_grouped_df[['PROVINCE_STATE_NAME',
                                   'PEOPLE_POSITIVE_CASES_COUNT',
                                   'PEOPLE_DEATH_COUNT']]
#rename cols
state_covid_df = state_covid_df.rename(columns = {'PROVINCE_STATE_NAME': 'State',
                                                      'PEOPLE_POSITIVE_CASES_COUNT': 'Cases',
                                                      'PEOPLE_DEATH_COUNT':'Deaths'})
#Add column for death/case ratio 
state_covid_df['% Deaths per Case'] = state_covid_df['Deaths']/state_covid_df['Cases']*100
#display
state_covid_df.head()

In [None]:
#merge datasets
df = pd.merge(state_covid_df, el_pop_df, on='State')
#display
df.head()

In [None]:
#calculate cases/deaths as percent of total population
df['% Cases'] = df['Cases']/df['Population']*100
df['% Deaths'] = df['Deaths']/df['Population']*100
df.head()

In [None]:
#State vs total case count - bar
plt.bar(df['State'],df['% Cases'])
plt.rcParams["figure.figsize"] = (15,5)
plt.title("US States: COVID-19 Cases as % of Total Population")
plt.xticks(rotation = 90)
plt.yticks(np.arange(0, 40, 5))
plt.ylabel("COVID-19 Cases as % of Total Population")
plt.show()

In [None]:
#State vs total death counts - bar
plt.bar(df['State'],df['% Deaths'])
plt.title("Deaths due to COVID-19 as a % of Total Population for each US State")
plt.xticks(rotation = 90)
plt.yticks(np.arange(0,0.5, 0.05))
plt.ylabel("Total Death Count")
plt.show()

In [None]:
#Of covid cases, which states had the highest mortality rate?
plt.bar(df['State'],df['% Deaths per Case'])
plt.title("US States, COVID-19 Mortality Rate")
plt.xticks(rotation = 90)
#plt.yticks(np.arange(0,100000, 10000))
plt.ylabel("% Deaths Per COVID-19 Case")
plt.show()

In [None]:
#group states by 2020 election results
grouped_el_df = df.groupby('2020 Election Result').mean()
group_df = grouped_el_df[['% Cases', '% Deaths', '% Deaths per Case']]
group_df = group_df.reset_index()
group_df


In [None]:
#Cases
plt.rcParams["figure.figsize"] = (5,5)
plt.bar(['Democrat', 'Republican'],group_df['% Cases'])
plt.yticks(np.arange(0,40,5))
plt.title("% COVID-19 Cases in States by 2020 Election Results")
plt.show()


In [None]:
#Deaths and Mortality
labels = ['% Deaths', 'Mortality Rate']
x = np.arange(len(labels))

deaths = group_df['% Deaths']
mortality = group_df['% Deaths per Case']
width = 0.4

# plot data in grouped manner of bar type
plt.bar(x-0.2, deaths, width, color='cyan')
plt.bar(x+0.2, mortality, width, color='green')
plt.xticks(x, labels)
plt.title("COVID-19 Deaths in States grouped by 2020 Election Results")
plt.ylabel("Percentage")
plt.legend(['Democratic States', 'Republican States'])
plt.rcParams["figure.figsize"] = (5,5)
plt.show()

