# Modern Data Analytics [G0Z39a]

## Project: Covid 19 in the USA

### Load packages

In [15]:
import mda_module_010 as mda

import os
import pandas as pd
import numpy as np

import yfinance as yf

import plotly.graph_objs as go
import plotly.io as pio
import plotly.express as px

import plotly.offline as py
py.init_notebook_mode(connected=True)
# jupyter notebook: notebook
#jupyter lab: plotly_mimetype
pio.renderers.default = 'notebook' 
  
import matplotlib.pyplot as plt

import datetime as dt

import ipywidgets as widgets
from IPython.display import display
from IPython.display import Markdown as md

from sklearn.compose import ColumnTransformer 
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

from statsmodels.tsa.seasonal import seasonal_decompose

import warnings
warnings.filterwarnings('ignore')

### Load data

In [16]:
cwd = os.getcwd()
us = pd.read_csv(cwd+"/covid-19-data/us.csv")
counties_df = pd.read_csv(cwd+"/covid-19-data/us-counties.csv")
states_df = pd.read_csv(cwd+"/covid-19-data/us-states.csv")
counties20 = pd.read_csv(cwd+"/covid-19-data/us-counties-2020.csv")
counties21 = pd.read_csv(cwd+"/covid-19-data/us-counties-2021.csv")
counties22 = pd.read_csv(cwd+"/covid-19-data/us-counties-2022.csv")
extra_data = pd.read_csv(cwd+"/data/extra_data.csv")
dfpop = pd.read_csv(cwd+'/data/pop_usa_states.csv')

### Data Info

In [17]:
us.isnull().sum()

date      0
cases     0
deaths    0
dtype: int64

In [18]:
states_df.isnull().sum()

date      0
state     0
fips      0
cases     0
deaths    0
dtype: int64

In [19]:
counties20.isnull().sum()

date          0
county        0
state         0
fips       8266
cases         0
deaths    18761
dtype: int64

In [20]:
counties21.isnull().sum()

date          0
county        0
state         0
fips      10803
cases         0
deaths    28470
dtype: int64

In [21]:
counties21.isnull().sum()

date          0
county        0
state         0
fips      10803
cases         0
deaths    28470
dtype: int64

The US and State datasets were clean. However the county datasets for 2020, 2021 and 2022 all had missing values in the fips and deaths columns.

### Data Pre-Processing

In [22]:
counties20_clean = mda.counties_preprocessing(counties20)
counties21_clean = mda.counties_preprocessing(counties21)
counties22_clean = mda.counties_preprocessing(counties22)

The above function clean and process the county datasets:
1) Treats missing values: If the data were available online (FIPS) data the missing values were replaced. Otherwise rows with missing values were dropped.
2) Splits date variable into year, month and day variables for convinience in the downstream analyses.
3) Adds code data to each State.

### Data Statistics

In [23]:
perstate20 = mda.per_state(counties20_clean)
perstate21 = mda.per_state(counties21_clean)
perstate22 = mda.per_state(counties22_clean)
percounty20 = mda.per_county(counties20_clean)
percounty21 = mda.per_county(counties21_clean)
percounty22 = mda.per_county(counties22_clean)

The above functions group the cleaned data by state and county to be able to get summary statistics.

In [24]:
state_stats20 = pd.DataFrame(counties20_clean.groupby('state').describe())
state_stats21 = pd.DataFrame(counties21_clean.groupby('state').describe())
state_stats22 = pd.DataFrame(counties22_clean.groupby('state').describe())

In [25]:
county_stats20 = pd.DataFrame(counties20_clean.groupby('county').describe())
county_stats21 = pd.DataFrame(counties21_clean.groupby('county').describe())
county_stats22 = pd.DataFrame(counties22_clean.groupby('county').describe())

### Map

In [26]:
mapdf = mda.extra_data_retriever(extra_data, mda.state_per_month(states_df))

The above function creates the dataframe for the map visual below which shows how monthly cases, deaths and vaccinations changed in the US for each state.

In [27]:
def fig_creator(s):
    import plotly.express as px
    
    fig = px.scatter_geo(mapdf, locations="code", locationmode="USA-states", hover_name="state",
                     hover_data=["cases", "deaths", "1_dose", "complete_dose"], size=s, size_max=20,
                     animation_frame="date", projection="albers usa", title="Covid-19 evolution in US per state", width=1000, height=1000)
    fig.show()

widgets.interact(fig_creator,
                 s=widgets.Dropdown(
                     options=[("Cases", "cases"),
                              ("Deaths", "deaths"),
                              ("One Vaccination Dose", "1_dose"),
                              ("Complete Vaccination", "complete_dose")],
                     description='Select:'));

interactive(children=(Dropdown(description='Select:', options=(('Cases', 'cases'), ('Deaths', 'deaths'), ('One…

### Time Series plot

In [28]:
us_timeseries = mda.timeseries_process(us, "us")
state_timeseries = mda.timeseries_process(states_df, "state")
counties = pd.concat([counties20_clean, counties21_clean, counties22_clean])
counties_timeseries = mda.timeseries_process(counties, "county")

In [29]:
dropdown_case = widgets.Dropdown(options=[("Daily Cases", 'daily_cases'), ("Daily Deaths", 'daily_deaths')])
dropdown_level = widgets.Dropdown(options=[('USA','us'),('State','state'), ('County','county')])
dropdown_state = widgets.Dropdown(options=state_timeseries['state'].unique())
dropdown_county = widgets.Dropdown(options=counties_timeseries['county'].unique())
input_widgets = widgets.HBox([dropdown_case, dropdown_level])

output = widgets.Output()
def com_filter(case, level, state, county):
    output.clear_output()
    if level == 'us':
        with output:
            mda.plot(us_timeseries, level="us", y=case)
    elif level == "state":
        with output:
            display(dropdown_state)
            mda.plot(state_timeseries, level="state", y=case, state=state)
    elif level == "county":
        with output:
            display(dropdown_county)
            mda.plot(counties_timeseries, level="county", y=case, county=county)

def dropdown_case_eventhandler(change):
    com_filter(change.new, dropdown_level.value, dropdown_state.value, dropdown_county.value)

def dropdown_level_eventhandler(change):
    com_filter(dropdown_case.value, change.new, dropdown_state.value, dropdown_county.value)    
    
def dropdown_state_eventhandler(change):
    com_filter(dropdown_case.value, dropdown_level.value, change.new, dropdown_county.value)
    
def dropdown_county_eventhandler(change):
    com_filter(dropdown_case.value, dropdown_level.value, dropdown_state.value, change.new)
    
dropdown_case.observe(dropdown_case_eventhandler, names='value')
dropdown_level.observe(dropdown_level_eventhandler, names='value')
dropdown_state.observe(dropdown_state_eventhandler, names='value')
dropdown_county.observe(dropdown_county_eventhandler, names='value')
display(input_widgets)
display(output)

#initial plot
with output:
    mda.plot(us_timeseries, level="us", y='daily_cases')

HBox(children=(Dropdown(options=(('Daily Cases', 'daily_cases'), ('Daily Deaths', 'daily_deaths')), value='dai…

Output()

#### Trend and Seasonality in the US

In [30]:
usa_time = us_timeseries.set_index('date')
usa_time.dropna(inplace=True)
decompose_result_mult = seasonal_decompose(usa_time['daily_cases'],period=365)
trend = decompose_result_mult.trend
seasonality = decompose_result_mult.seasonal
residual = decompose_result_mult.resid

In [31]:
px.line(seasonality)

In [32]:
px.line(trend)

# A look into the Case fatality ratio and the Cause-specific mortality ratio

In [33]:
dfpop.rename(columns={"State":"state"},inplace=True)
dfstates = pd.merge(state_timeseries, dfpop[['state','Pop']], how="left", on="state")

In [34]:
dfstates.head()

Unnamed: 0,date,state,fips,cases,deaths,daily_cases,daily_deaths,Pop
0,2020-03-13,Alabama,1,6,0,0,0,4949697.0
1,2020-03-14,Alabama,1,12,0,6,0,4949697.0
2,2020-03-15,Alabama,1,23,0,11,0,4949697.0
3,2020-03-16,Alabama,1,29,0,6,0,4949697.0
4,2020-03-17,Alabama,1,39,0,10,0,4949697.0


In [35]:
date = '2020-12-15'
print('Cause-specific mortality ratio per 1000 people in the US on {}:'.format(date),
      mda.comp_csr(us_timeseries,date,state=False), 'Per 1000 people')
mda.comp_csr(dfstates,date)


print('Case to fatality ratio in percent in the US on {}:'.format(date),
      mda.comp_cfr(us_timeseries,date,state=False),'%')
mda.comp_cfr(dfstates,date)

Cause-specific mortality ratio per 1000 people in the US on 2020-12-15: 0.923 Per 1000 people


Case to fatality ratio in percent in the US on 2020-12-15: 1.813 %


In [36]:
date = '2022-05-15'
print('Cause-specific mortality ratio per 1000 people in the US on {}:'.format(date),
      mda.comp_csr(us_timeseries,date,state=False),'per 1000 people')
print('Case to fatality ratio in percent in the US on {}:'.format(date),
      mda.comp_cfr(us_timeseries,date,state=False),'%')

Cause-specific mortality ratio per 1000 people in the US on 2022-05-15: 3.03 per 1000 people
Case to fatality ratio in percent in the US on 2022-05-15: 1.211 %


We can see that even tho the cause-specific mortality ratio increased since the begining of the vaccination campaign, the case to fatality decreased of 0.5%. Thus since people have been getting vaccinated there are less covid related deaths

## Some basic numbers 

In [37]:
#state with the highest total cases, deaths 
h_c_state = dfstates[dfstates['cases']==dfstates['cases'].max()]
date = h_c_state['date'].dt.strftime('%Y-%m-%d').values[0]
state = h_c_state['state'].values[0]
cases = h_c_state['cases'].values[0]
h_d_state = dfstates[dfstates['deaths']==dfstates['deaths'].max()]
date_d = h_d_state['date'].dt.strftime('%Y-%m-%d').values[0]
state_d = h_d_state['state'].values[0]
deaths = h_c_state['deaths'].values[0]
print('The state that has the most cases as of {} is {} with a total of {} cases.'.format(date,state,cases))
print('The state that has the most deaths as of {} is {} with a total of {} deaths.'.format(date_d,state_d,deaths))

The state that has the most cases as of 2022-05-23 is California with a total of 9461626 cases.
The state that has the most deaths as of 2022-05-23 is California with a total of 91231 deaths.


In [38]:
last_upd = us_timeseries.iloc[-1]
tot_cases = last_upd['cases']
tot_deaths = last_upd['deaths']
date = last_upd['date'].strftime('%Y-%m-%d')
print('As of {}, {} people were tested positive and {} people died of COVID-19 in the US.'.format(date,tot_cases,tot_deaths))

As of 2022-05-23, 83325014 people were tested positive and 1000826 people died of COVID-19 in the US.


## Let's have a look at some stocks and the impact of covid 

In [39]:
start_date = us_timeseries['date'].min()
end_date = us_timeseries['date'].max()
stocks = mda.get_stocks('^GSPC','^NDX',start_date,end_date,'sp500','nasdaq')
dfsp500 = stocks[0]
dfndx = stocks[1]
dfstocks = stocks[2]

In [40]:
max_y =dfstocks['Close'].max()
fig = px.line(dfstocks,y= 'Close' ,color='name')
fig.add_vline(x='2020-03-13',line_dash="dash") #emergency state in the USA
fig.add_vline(x='2020-12-14',line_dash="dash") #first vaccine in the USA
fig.add_vline(x='2021-06-01',line_dash="dash") #delta variant
fig.add_vline(x='2021-11-26',line_dash="dash") #omicron variant
fig.add_annotation(x='2020-03-13', y=max_y,
            text="Emergency state in the USA",
            showarrow=True,
            arrowhead=1)
fig.add_annotation(x='2020-12-14', y=max_y,
            text="First vaccin in the USA",
            showarrow=True,
            arrowhead=1)
fig.add_annotation(x='2021-06-01', y=max_y,
            text="Delta variant",
            showarrow=True,
            arrowhead=1)
fig.add_annotation(x='2021-11-26', y=max_y,
            text="Omircron variant",
            showarrow=True,
            arrowhead=1)

In [41]:
ndx_changes = mda.changes(dfndx,['2020-03-13','2020-12-14','2021-06-01','2021-11-26'])
sp500_changes = mda.changes(dfsp500,['2020-03-13','2020-12-14','2021-06-01','2021-11-26'])

In [42]:
md("The declaration of the emergency state had a big impact on both s&p500 and nasdaq stock as their values were down by  {}% and {}% respectively. Neither the first vaccin nor the arrival of the delta variant seem to have had an impact   as both stayed on the same trend. The Omicron variant has a small impact on both as their values were down by {}% and {}% after 7 days of the first case."
   .format(round(ndx_changes[0],3),round(sp500_changes[0],3), round(ndx_changes[3],2),round(sp500_changes[3],2)))

The declaration of the emergency state had a big impact on both s&p500 and nasdaq stock as their values were down by  -12.52% and -14.98% respectively. Neither the first vaccin nor the arrival of the delta variant seem to have had an impact   as both stayed on the same trend. The Omicron variant has a small impact on both as their values were down by -1.96% and -1.22% after 7 days of the first case.

### Clustering

In [43]:
df = mapdf.copy(deep=True)

In [44]:
df20_processed, df21_processed, df22_processed = mda.cluster_process(df)

In [45]:
df20_processed.drop("Hawaii", inplace=True)

In [46]:
df20_processed.head()

Unnamed: 0_level_0,latitude,longitude,cases,deaths,1_dose,complete_dose,population,risk_category
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Alabama,32.601011,-86.680736,361226,4827,0,0,4903185,Orange
Alaska,61.302501,-158.77502,46740,198,0,0,731545,Orange
Arizona,34.168219,-111.930907,523829,8879,26551,68,7278717,Orange
Arkansas,34.751928,-92.131378,225138,3676,0,0,3017804,Orange
California,37.271875,-119.270415,2307860,25965,87753,322,39512223,Yellow


#### 2020

In [47]:
warnings.filterwarnings('ignore')

clusterdf20, Z20, silhoutte20, ari20 = mda.cluster_algorithm(df20_processed, "KMeans")

def cluster_fig20(selection):
    import plotly.express as px
    
    title_dict = {"cases":"Cases",
                  "deaths":"Deaths",
                  "1_dose":"1 Vaccination Dose",
                  "complete_dose":"Complete Vaccinations"}
    
    fig20 = go.Figure(
    data=go.Scatter(
        x=clusterdf20["PC1"].values,
        y=clusterdf20["PC2"].values,
        text=clusterdf20.index,
        mode='markers',
        marker=go.Marker(
            size=df20_processed[selection],
            sizemode='diameter',
            sizeref=df20_processed[selection].max()/50,
            opacity=1,
            color=Z20
            )
        )
    )

    fig20.update_layout(
        go.Layout(
            title=f'US State Cluster Analysis 2020 (size represents {title_dict[selection]})',
            xaxis=go.XAxis(title="PC1", showgrid=True, zeroline=True, showticklabels=True),
            yaxis=go.YAxis(title="PC2", showgrid=True, zeroline=True, showticklabels=True),
            hovermode='closest'
        )
    )

    fig20.show()

widgets.interact(cluster_fig20,
                 selection=widgets.Dropdown(
                     options=[("Cases", "cases"),
                              ("Deaths", "deaths"),
                              ("1 Vaccination Dose", "1_dose"),
                              ("Complete Vaccinations", "complete_dose")],
                     description='Select:'));

interactive(children=(Dropdown(description='Select:', options=(('Cases', 'cases'), ('Deaths', 'deaths'), ('1 V…

#### 2021

In [48]:
warnings.filterwarnings('ignore')

clusterdf21, Z21, silhoutte21, ari21 = mda.cluster_algorithm(df21_processed, "KMeans")

def cluster_fig21(selection):
    import plotly.express as px
    
    title_dict = {"cases":"Cases",
                  "deaths":"Deaths",
                  "1_dose":"1 Vaccination Dose",
                  "complete_dose":"Complete Vaccinations"}
    
    fig21 = go.Figure(
    data=go.Scatter(
        x=clusterdf21["PC1"].values,
        y=clusterdf21["PC2"].values,
        text=clusterdf21.index,
        mode='markers',
        marker=go.Marker(
            size=df21_processed[selection],
            sizemode='diameter',
            sizeref=df21_processed[selection].max()/50,
            opacity=1,
            color=Z20
            )
        )
    )

    fig21.update_layout(
        go.Layout(
            title=f'US State Cluster Analysis 2021 (size represents {title_dict[selection]})',
            xaxis=go.XAxis(title="PC1", showgrid=True, zeroline=True, showticklabels=True),
            yaxis=go.YAxis(title="PC2", showgrid=True, zeroline=True, showticklabels=True),
            hovermode='closest'
        )
    )

    fig21.show()

widgets.interact(cluster_fig21,
                 selection=widgets.Dropdown(
                     options=[("Cases", "cases"),
                              ("Deaths", "deaths"),
                              ("1 Vaccination Dose", "1_dose"),
                              ("Complete Vaccinations", "complete_dose")],
                     description='Select:'));

interactive(children=(Dropdown(description='Select:', options=(('Cases', 'cases'), ('Deaths', 'deaths'), ('1 V…

#### 2022

In [49]:
warnings.filterwarnings('ignore')

clusterdf22, Z22, silhoutte22, ari22 = mda.cluster_algorithm(df22_processed, "KMeans")

def cluster_fig22(selection):
    import plotly.express as px
    
    title_dict = {"cases":"Cases",
                  "deaths":"Deaths",
                  "1_dose":"1 Vaccination Dose",
                  "complete_dose":"Complete Vaccinations"}
    
    fig22 = go.Figure(
    data=go.Scatter(
        x=clusterdf22["PC1"].values,
        y=clusterdf22["PC2"].values,
        text=clusterdf22.index,
        mode='markers',
        marker=go.Marker(
            size=df22_processed[selection],
            sizemode='diameter',
            sizeref=df22_processed[selection].max()/50,
            opacity=1,
            color=Z20
            )
        )
    )

    fig22.update_layout(
        go.Layout(
            title=f'US State Cluster Analysis 2022 (size represents {title_dict[selection]})',
            xaxis=go.XAxis(title="PC1", showgrid=True, zeroline=True, showticklabels=True),
            yaxis=go.YAxis(title="PC2", showgrid=True, zeroline=True, showticklabels=True),
            hovermode='closest'
        )
    )

    fig22.show()

widgets.interact(cluster_fig22,
                 selection=widgets.Dropdown(
                     options=[("Cases", "cases"),
                              ("Deaths", "deaths"),
                              ("1 Vaccination Dose", "1_dose"),
                              ("Complete Vaccinations", "complete_dose")],
                     description='Select:'));

interactive(children=(Dropdown(description='Select:', options=(('Cases', 'cases'), ('Deaths', 'deaths'), ('1 V…

### ANOVA

In [50]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [51]:
anovadf = mda.lmem_process(mapdf)

In [52]:
anova2020 = anovadf[anovadf["year"]==2020]
anova2021 = anovadf[anovadf["year"]==2021]
anova2022 = anovadf[anovadf["year"]==2022]

In [53]:
temp_df = anova2021.copy(deep=True)
temp_df["month"]-=12
month_dict = {1:"January", 2:"February", 3:"March", 4:"April", 5:"May", 6:"June", 7:"July", 8:"August", 9:"September", 10:"October", 11:"November", 12:"December"}
temp_df["month"] = temp_df["month"].map(month_dict)

def fig_creator2(selection):
    import plotly.express as px
    
    title_dict = {"monthly_cases":"Monthly Cases",
                  "monthly_deaths":"Monthly Deaths",
                  "monthly_1dose":"Monthly Vaccination Dose",
                  "monthly_completedose":"Monthly Complete Vaccinations"}
    
    fig = px.strip(temp_df, x='month', y=selection, color="state", title=f"{title_dict[selection]} per month in 2021",
                   hover_data=["monthly_cases", "monthly_deaths", "monthly_1dose", "monthly_completedose"],
                   labels={selection:title_dict[selection]})
    fig.show()

widgets.interact(fig_creator2,
                 selection=widgets.Dropdown(
                     options=[("Monthly Cases", "monthly_cases"),
                              ("Monthly Deaths", "monthly_deaths"),
                              ("Monthly Vaccination Dose", "monthly_1dose"),
                              ("Monthly Complete Vaccinations", "monthly_completedose")],
                     description='Select:'));

interactive(children=(Dropdown(description='Select:', options=(('Monthly Cases', 'monthly_cases'), ('Monthly D…

In [54]:
temp_df = anova2021.copy(deep=True)
temp_df["month"]-=12

def fig_creator3(selection1, selection2):
    import plotly.express as px
    
    title_dict = {"monthly_cases":"Monthly Cases",
                  "monthly_deaths":"Monthly Deaths",
                  "monthly_1dose":"Monthly Vaccination Dose",
                  "monthly_completedose":"Monthly Complete Vaccinations"}
    
    fig = px.scatter(temp_df[temp_df["state"]==selection2], x="month", y=selection1, trendline="ols",
                     title=f"{title_dict[selection1]} in {selection2} 2021",
                    labels={selection1:title_dict[selection1]})
    fig.show()

selection2_options = []
for s in anova2021["state"].unique():
    selection2_options.append((s,s))

widgets.interact(fig_creator3,
                 
                 selection1=widgets.Dropdown(
                     options=[("Monthly Cases", "monthly_cases"),
                              ("Monthly Deaths", "monthly_deaths"),
                              ("Monthly Vaccination Dose", "monthly_1dose"),
                              ("Monthly Complete Vaccinations", "monthly_completedose")],
                     description='Select:'),
                 
                 selection2=widgets.Dropdown(
                     options=selection2_options,
                     description='Select:'));

interactive(children=(Dropdown(description='Select:', options=(('Monthly Cases', 'monthly_cases'), ('Monthly D…

In [55]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [None]:
temp_df = anova2021.copy(deep=True)
temp_df = temp_df[["state", "year", "month", "monthly_cases", "monthly_deaths"]]
temp_df["month"]-=12
month_dict = {1:"January", 2:"February", 3:"March", 4:"April", 5:"May", 6:"June", 7:"July", 8:"August", 9:"September", 10:"October", 11:"November", 12:"December"}
temp_df["month"] = temp_df["month"].map(month_dict)

In [None]:
tempdf_cases = temp_df[["month", "monthly_cases"]]
tempdf_cases["index"] = list(range(49))*12
tempdf_cases = tempdf_cases[["index", "month", "monthly_cases"]]
tempdf_deaths = temp_df[["month", "monthly_deaths"]]
tempdf_deaths["index"] = list(range(49))*12
tempdf_deaths = tempdf_deaths[["index", "month", "monthly_deaths"]]

In [None]:
boxplot = px.box(tempdf_cases, x="month", y="monthly_cases",
                 labels={"monthly_cases":"Monthly Cases", "month":"Month"},
                 color="month", title="Monthly Cases in US")
boxplot.show()

In [None]:
boxplot1 = px.box(tempdf_deaths, x="month", y="monthly_deaths",
                 labels={"monthly_deaths":"Monthly Deaths", "month":"Month"},
                 color="month", title="Monthly Deaths in US")
boxplot1.show()

#### Monthly Cases

In [None]:
# get ANOVA table as R like output
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Ordinary Least Squares (OLS) model
model = ols('monthly_cases ~ month', data=tempdf_cases).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
anova_table

In [None]:
from bioinfokit.analys import stat
# perform multiple pairwise comparison (Tukey's HSD)
# unequal sample size data, tukey_hsd uses Tukey-Kramer test
res = stat()
res.tukey_hsd(df=tempdf_cases, res_var='monthly_cases', xfac_var='month', anova_model='monthly_cases ~ month')
res.tukey_summary

#### Monthly Deaths

In [None]:
model = ols('monthly_deaths ~ month', data=tempdf_deaths).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
anova_table

In [None]:
from bioinfokit.analys import stat
# perform multiple pairwise comparison (Tukey's HSD)
# unequal sample size data, tukey_hsd uses Tukey-Kramer test
res = stat()
res.tukey_hsd(df=tempdf_deaths, res_var='monthly_deaths', xfac_var='month', anova_model='monthly_deaths ~ month')
res.tukey_summary