In [None]:
import os
import json
import gzip
import random
import logging

import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats as ss
import statsmodels.formula.api as smf

import matplotlib as mp
%matplotlib inline
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm, trange

from IPython.display import display

In [None]:
logger = logging.getLogger(__name__)

fmt = '%(asctime)s : %(levelname)s : %(message)s'
logging.basicConfig(format=fmt, level=logging.INFO)

In [None]:
os.chdir(os.path.expanduser('~/github/masthesis/'))

In [None]:
seed = 2969591811

random.seed(seed)
np.random.seed(seed)

In [None]:
colors = sns.color_palette('colorblind', 10)

elite_color = colors[3]
radio_color = colors[0]
decahose_color = colors[9]
decahose2_color = colors[4]

# Load data

In [None]:
stats_overall = pd.read_csv('data/paper-round-3/event-annotated/auto-story-stats.csv', index_col='story_id')
assert stats_overall.index.is_unique

stats_lib = pd.read_csv('data/paper-round-3/event-annotated/auto-story-stats-lib.csv', index_col='story_id')
assert stats_lib.index.is_unique

stats_con = pd.read_csv('data/paper-round-3/event-annotated/auto-story-stats-con.csv', index_col='story_id')
assert stats_con.index.is_unique

stats = pd.concat([stats_lib, stats_con], axis=0)

In [None]:
selected = pd.read_csv('data/paper-round-3/event-annotated/auto-sample-communities-filter-list.csv', index_col='story_id')
assert selected.index.is_unique

covid_threshold = 0.1
selected['decahose_covid_2020'] = (selected['year'] == 2020) & (selected['kind'] == 'decahose') & (selected['covid'] >= covid_threshold)

stats['decahose_covid_2020'] = selected['decahose_covid_2020']
stats['decahose_covid_2020'] = stats['decahose_covid_2020'].fillna(False)

stats_overall['decahose_covid_2020'] = selected['decahose_covid_2020']
stats_overall['decahose_covid_2020'] = stats_overall['decahose_covid_2020'].fillna(False)

## Quality filter

In [None]:
length_mask_overall = (stats_overall['count'] >= 20)
selected_mask_overall = stats_overall.index.isin(selected.index)
kind_mask_overall = (stats_overall['kind'] != 'decahose')

mask_overall = (
    length_mask_overall
    & kind_mask_overall
    & selected_mask_overall
)

mask_lib = mask_overall & (stats_lib['count'] >= 10)  # must be >= 2 to avoid nans in SDs
mask_con = mask_overall & (stats_con['count'] >= 10)

mask = pd.concat([mask_lib, mask_con], axis=0)

mask_overall_10 = (
    (stats_overall['count'] >= 10)
    & kind_mask_overall
    & selected_mask_overall
)

dh_mask_overall_10 = (
    (stats_overall['count'] >= 10)
    & selected_mask_overall
)

mask.sum(), stats.shape[0]

## Item-level data

In [None]:
with gzip.open('data/paper-round-3/event-annotated/auto-sample.csv.gz', 'rt') as f:
    dat = pd.read_csv(f, parse_dates=['timestamp'], index_col='id')

assert dat.index.is_unique

dat.shape

In [None]:
dat.loc[dat['year'] < 2022].groupby('kind').size()

In [None]:
with gzip.open('data/paper-round-3/event-annotated/auto-sample-communities-merged-pre-filter.csv.gz', 'rt') as f:
    comms = pd.read_csv(f, index_col='id')

assert comms.index.is_unique
assert comms['year'].isna().sum() == 0

comms.shape

In [None]:
dat['group'] = comms['group']
has_group_mask = dat['group'].notna()
dat = dat.loc[has_group_mask, :]

dat['group'] = dat['group'].astype(int)
dat['year'] = dat['year'].astype(int)

In [None]:
dat['story_id'] = dat['year'].astype(str) + '-' + dat['kind'] + '-' + dat['group'].astype(str)

In [None]:
print(dat.shape)

dat = dat.merge(
    dat.groupby('story_id')['reltime'].min().rename('story_reltime_min').reset_index(),
    how='inner',
    on='story_id'
)

dat['ws_reltime'] = dat['reltime'] - dat['story_reltime_min']

print(dat.shape)

In [None]:
stories_overall_10 = stats_overall.loc[mask_overall_10].index
dat_mask_overall_10 = dat['story_id'].isin(stories_overall_10)

stories = stats.loc[mask].index
dat_mask = dat['story_id'].isin(stories)

stories_with_dh = stats_overall.loc[dh_mask_overall_10 & ~stats_overall['decahose_covid_2020']].index
dat_mask_with_dh = dat['story_id'].isin(stories_with_dh)

stories_lib = stats.loc[mask & (stats['conservative'] == 0)].index
dat_mask_lib = dat['story_id'].isin(stories_lib)

stories_con = stats.loc[mask & (stats['conservative'] == 1)].index
dat_mask_con = dat['story_id'].isin(stories_con)

In [None]:
dat.loc[dat_mask].groupby('kind')['ws_reltime'].mean()

In [None]:
dat.loc[dat_mask_with_dh].groupby('kind')['ws_reltime'].mean()

In [None]:
dat.loc[dat_mask_with_dh].groupby('kind')['ws_reltime'].std()

# Big 6-pane story-level figure

In [None]:
nbins = 20

fig, axes = plt.subplots(2, 3, figsize=(15, 10))
twins = np.asarray([
    [axes[0][0].twinx(), axes[0][1].twinx(), axes[0][2].twinx()],
    [axes[1][0].twinx(), axes[1][1].twinx(), axes[1][2].twinx()],
])

sns.histplot(
    stats_overall.loc[
        mask_overall_10
        & (stats_overall['kind'] == 'elite'),
    'avg'],
    ax=axes[0][0], color=elite_color, alpha=0.3, hatch='//', label='Elite', kde=True, bins=nbins
)
sns.histplot(
    stats_overall.loc[
        mask_overall_10
        & (stats_overall['kind'] == 'radio'),
    'avg'],
    ax=twins[0][0], color=radio_color, alpha=0.3, hatch='\\\\', label='Radio', kde=True, bins=nbins
)
sns.histplot(
    stats.loc[
        mask
        & (stats['conservative'] == 0)
        & (stats['kind'] == 'elite'),
    'avg_abs'],
    ax=axes[0][1], color=elite_color, alpha=0.3, hatch='//', label='Elite', kde=True, bins=nbins
)
sns.histplot(
    stats.loc[
        mask
        & (stats['conservative'] == 0)
        & (stats['kind'] == 'radio'),
    'avg_abs'],
    ax=twins[0][1], color=radio_color, alpha=0.3, hatch='\\\\', label='Radio', kde=True, bins=nbins
)
sns.histplot(
    stats.loc[
        mask
        & (stats['conservative'] == 1)
        & (stats['kind'] == 'elite'),
    'avg_abs'],
    ax=axes[0][2], color=elite_color, alpha=0.3, hatch='//', label='Elite', kde=True, bins=nbins
)
sns.histplot(
    stats.loc[
        mask
        & (stats['conservative'] == 1)
        & (stats['kind'] == 'radio'),
    'avg_abs'],
    ax=twins[0][2], color=radio_color, alpha=0.3, hatch='\\\\', label='Radio', kde=True, bins=nbins
)

sns.histplot(
    stats_overall.loc[
        mask_overall_10
        & (stats_overall['kind'] == 'elite'),
    'std'],
    ax=axes[1][0], color=elite_color, alpha=0.3, hatch='//', label='Elite', kde=True, bins=nbins
)
sns.histplot(
    stats_overall.loc[
        mask_overall_10
        & (stats_overall['kind'] == 'radio'),
    'std'],
    ax=twins[1][0], color=radio_color, alpha=0.3, hatch='\\\\', label='Radio', kde=True, bins=nbins
)
sns.histplot(
    stats.loc[
        mask
        & (stats['conservative'] == 0)
        & (stats['kind'] == 'elite'),
    'std'],
    ax=axes[1][1], color=elite_color, alpha=0.3, hatch='//', label='Elite', kde=True, bins=nbins
)
sns.histplot(
    stats.loc[
        mask
        & (stats['conservative'] == 0)
        & (stats['kind'] == 'radio'),
    'std'],
    ax=twins[1][1], color=radio_color, alpha=0.3, hatch='\\\\', label='Radio', kde=True, bins=nbins
)
sns.histplot(
    stats.loc[
        mask
        & (stats['conservative'] == 1)
        & (stats['kind'] == 'elite'),
    'std'],
    ax=axes[1][2], color=elite_color, alpha=0.3, hatch='//', label='Elite', kde=True, bins=nbins
)
sns.histplot(
    stats.loc[
        mask
        & (stats['conservative'] == 1)
        & (stats['kind'] == 'radio'),
    'std'],
    ax=twins[1][2], color=radio_color, alpha=0.3, hatch='\\\\', label='Radio', kde=True, bins=nbins
)

axes[0][0].set_title('Mean Overall', fontsize=16)
axes[0][1].set_title('Mean Liberal', fontsize=16)
axes[0][2].set_title('Mean Conservative', fontsize=16)

axes[1][0].set_title('SD Overall', fontsize=16)
axes[1][1].set_title('SD Liberal', fontsize=16)
axes[1][2].set_title('SD Conservative', fontsize=16)

for ax, twin in zip(axes.flatten(), twins.flatten()):
    ax.set_xlabel('Time', fontsize=14)
    ax.set_ylabel('Count', fontsize=14)
    twin.set_ylabel(None)

    # ax.yaxis.set_major_locator(mp.ticker.MultipleLocator(5))
    # twin.yaxis.set_major_locator(mp.ticker.MultipleLocator(5))
    
    fmt = mp.ticker.FuncFormatter(lambda x, pos: f'{x / 3600:.0f}h')
    ax.xaxis.set_major_formatter(fmt)

    ax.tick_params(axis='both', which='major', labelsize=11)
    twin.tick_params(axis='both', which='major', labelsize=11)
    
    h1, l1 = ax.get_legend_handles_labels()
    h2, l2 = twin.get_legend_handles_labels()
    ax.legend(h1 + h2, l1 + l2, loc=0, fontsize=14)

#
# t-tests
#

ovrl_test_mean = ss.ttest_ind(
    stats_overall.loc[mask_overall_10 & (stats_overall['kind'] == 'elite'), 'avg'],
    stats_overall.loc[mask_overall_10 & (stats_overall['kind'] == 'radio'), 'avg']
)

lib_test_mean = ss.ttest_ind(
    stats.loc[mask & (stats['conservative'] == 0) & (stats['kind'] == 'elite'), 'avg_abs'],
    stats.loc[mask & (stats['conservative'] == 0) & (stats['kind'] == 'radio'), 'avg_abs']
)

con_test_mean = ss.ttest_ind(
    stats.loc[mask & (stats['conservative'] == 1) & (stats['kind'] == 'elite'), 'avg_abs'],
    stats.loc[mask & (stats['conservative'] == 1) & (stats['kind'] == 'radio'), 'avg_abs']
)

ovrl_test_sd = ss.ttest_ind(
    stats_overall.loc[mask_overall_10 & (stats_overall['kind'] == 'elite'), 'std'],
    stats_overall.loc[mask_overall_10 & (stats_overall['kind'] == 'radio'), 'std']
)

lib_test_sd = ss.ttest_ind(
    stats.loc[mask & (stats['conservative'] == 0) & (stats['kind'] == 'elite'), 'std'],
    stats.loc[mask & (stats['conservative'] == 0) & (stats['kind'] == 'radio'), 'std']
)

con_test_sd = ss.ttest_ind(
    stats.loc[mask & (stats['conservative'] == 1) & (stats['kind'] == 'elite'), 'std'],
    stats.loc[mask & (stats['conservative'] == 1) & (stats['kind'] == 'radio'), 'std']
)

props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)

textstr = '\n'.join((
    r'$H_0: \rm{\bar{E}} = \rm{\bar{R}}$',
    r'$t=%.2f$' % (ovrl_test_mean.statistic, ),
    r'$p = \rm{%.1e}$' % (ovrl_test_mean.pvalue, ),
))
axes[0][0].text(0.6, 0.65, textstr, transform=axes[0][0].transAxes, fontsize=14,
             verticalalignment='top', bbox=props)    

textstr = '\n'.join((
    r'$H_0: \rm{\bar{E}} = \rm{\bar{R}}$',
    r'$t=%.2f$' % (lib_test_mean.statistic, ),
    r'$p = \rm{%.1e}$' % (lib_test_mean.pvalue, ),
))
axes[0][1].text(0.6, 0.65, textstr, transform=axes[0][1].transAxes, fontsize=14,
             verticalalignment='top', bbox=props)    

textstr = '\n'.join((
    r'$H_0: \rm{\bar{E}} = \rm{\bar{R}}$',
    r'$t=%.2f$' % (con_test_mean.statistic, ),
    r'$p = \rm{%.1e}$' % (con_test_mean.pvalue, ),
))
axes[0][2].text(0.6, 0.65, textstr, transform=axes[0][2].transAxes, fontsize=14,
             verticalalignment='top', bbox=props)    

textstr = '\n'.join((
    r'$H_0: \rm{\bar{E}} = \rm{\bar{R}}$',
    r'$t=%.2f$' % (ovrl_test_sd.statistic, ),
    r'$p = \rm{%.1e}$' % (ovrl_test_sd.pvalue, ),
))
axes[1][0].text(0.6, 0.65, textstr, transform=axes[1][0].transAxes, fontsize=14,
             verticalalignment='top', bbox=props)    

textstr = '\n'.join((
    r'$H_0: \rm{\bar{E}} = \rm{\bar{R}}$',
    r'$t=%.2f$' % (lib_test_sd.statistic, ),
    r'$p = \rm{%.1e}$' % (lib_test_sd.pvalue, ),
))
axes[1][1].text(0.6, 0.65, textstr, transform=axes[1][1].transAxes, fontsize=14,
             verticalalignment='top', bbox=props)    

textstr = '\n'.join((
    r'$H_0: \rm{\bar{E}} = \rm{\bar{R}}$',
    r'$t=%.2f$' % (con_test_sd.statistic, ),
    r'$p = \rm{%.1e}$' % (con_test_sd.pvalue, )
))
axes[1][2].text(0.63, 0.65, textstr, transform=axes[1][2].transAxes, fontsize=14,
             verticalalignment='top', bbox=props)    

fig.tight_layout()

# Smaller 3-pane item-level figure

In [None]:
nbins = 20

fig, axes = plt.subplots(1, 3, figsize=(15, 5), sharey=True)
twins = np.asarray([axes[0].twinx(), axes[1].twinx(), axes[2].twinx()])

for twin in twins[1:]:
    twins[0].get_shared_y_axes().join(twins[0], twin)
twins[0].autoscale()
for twin in twins[:-1]:
    twin.yaxis.set_tick_params(labelright=False)

#
# Plot series
#

sns.histplot(
    dat.loc[dat_mask_overall_10 & (dat['kind'] == 'elite'), 'ws_reltime'],
    ax=axes[0], color=elite_color, alpha=0.3, hatch='//', label='Elite', kde=True, bins=nbins
)
sns.histplot(
    dat.loc[dat_mask_overall_10 & (dat['kind'] == 'radio'), 'ws_reltime'],
    ax=twins[0], color=radio_color, alpha=0.3, hatch='\\\\', label='Radio', kde=True, bins=nbins
)
sns.histplot(
    dat.loc[dat_mask_lib & (dat['kind'] == 'elite'), 'ws_reltime'],
    ax=axes[1], color=elite_color, alpha=0.3, hatch='//', label='Elite', kde=True, bins=nbins
)
sns.histplot(
    dat.loc[dat_mask_lib & (dat['kind'] == 'radio'), 'ws_reltime'],
    ax=twins[1], color=radio_color, alpha=0.3, hatch='\\\\', label='Radio', kde=True, bins=nbins
)
sns.histplot(
    dat.loc[dat_mask_con & (dat['kind'] == 'elite'), 'ws_reltime'],
    ax=axes[2], color=elite_color, alpha=0.3, hatch='//', label='Elite', kde=True, bins=nbins
)
sns.histplot(
    dat.loc[dat_mask_con & (dat['kind'] == 'radio'), 'ws_reltime'],
    ax=twins[2], color=radio_color, alpha=0.3, hatch='\\\\', label='Radio', kde=True, bins=nbins
)

#
# t-tests
#

ovrl_test = ss.ttest_ind(
    dat.loc[dat_mask_overall_10 & (dat['kind'] == 'elite'), 'ws_reltime'],
    dat.loc[dat_mask_overall_10 & (dat['kind'] == 'radio'), 'ws_reltime']
)
lib_test = ss.ttest_ind(
    dat.loc[dat_mask_lib & (dat['kind'] == 'elite'), 'ws_reltime'],
    dat.loc[dat_mask_lib & (dat['kind'] == 'radio'), 'ws_reltime']
)
con_test = ss.ttest_ind(
    dat.loc[dat_mask_con & (dat['kind'] == 'elite'), 'ws_reltime'],
    dat.loc[dat_mask_con & (dat['kind'] == 'radio'), 'ws_reltime']
)

axes[0].set_title('Overall', fontsize=16)
axes[1].set_title('Liberal', fontsize=16)
axes[2].set_title('Conservative', fontsize=16)

for ax, twin in zip(axes.flatten(), twins.flatten()):
    ax.set_xlabel('Time', fontsize=14)
    
    ax.set_ylabel('Count (Elite)', fontsize=14)
    twin.set_ylabel('Count (Radio)', fontsize=14, rotation=270, labelpad=15)

    fmt = mp.ticker.FuncFormatter(lambda x, pos: f'{x / 3600:.0f}h')
    ax.xaxis.set_major_formatter(fmt)

    ax.tick_params(axis='both', which='major', labelsize=11)
    twin.tick_params(axis='both', which='major', labelsize=11)
    
    h1, l1 = ax.get_legend_handles_labels()
    h2, l2 = twin.get_legend_handles_labels()
    ax.legend(h1 + h2, l1 + l2, loc=0, fontsize=14)

props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)

alpha = 1 - ss.norm(0, 1).cdf(5)

textstr = '\n'.join((
    r'$H_0: \rm{\bar{E}} = \rm{\bar{R}}$',
    r'$t=%.2f$' % (ovrl_test.statistic, ),
    r'df = %0d' % (int(ovrl_test.df), ),
    r'$p < \rm{%.1e}$' % (alpha, ),
))
axes[0].text(0.6, 0.65, textstr, transform=axes[0].transAxes, fontsize=13,
             verticalalignment='top', bbox=props)

textstr = '\n'.join((
    r'$H_0: \rm{\bar{E}} = \rm{\bar{R}}$',
    r'$t=%.2f$' % (lib_test.statistic, ),
    r'df = %0d' % (int(lib_test.df), ),
    r'$p < \rm{%.1e}$' % (alpha, ),
))
axes[1].text(0.6, 0.65, textstr, transform=axes[1].transAxes, fontsize=13,
             verticalalignment='top', bbox=props)

textstr = '\n'.join((
    r'$H_0: \rm{\bar{E}} = \rm{\bar{R}}$',
    r'$t=%.2f$' % (con_test.statistic, ),
    r'df = %0d' % (int(con_test.df), ),
    r'$p < \rm{%.1e}$' % (alpha, ),
))
axes[2].text(0.6, 0.65, textstr, transform=axes[2].transAxes, fontsize=13,
             verticalalignment='top', bbox=props)

fig.tight_layout()

In [None]:
# show that the "< alpha" assertions in the figure are true

# we do this rather than reporting values that underflow machine precision
# exactly or as 0 because extremely low p-values are dependent on various
# approximations made in calculating them, are often wrong, and many
# people have aggressively negative reactions to reporting them

alpha = 1 - ss.norm(0, 1).cdf(5)
assert ovrl_test.pvalue <= alpha
assert lib_test.pvalue <= alpha
assert lib_test.pvalue <= alpha

In [None]:
print(ovrl_test)
print(lib_test)
print(con_test)

# One-pane figures

## Elite / Radio

In [None]:
nbins = 20

fig, ax = plt.subplots(1, 1, figsize=(10, 10))
axes = np.asarray([ax])
twins = np.asarray([axes[0].twinx()])

sns.histplot(
    dat.loc[dat_mask_overall_10 & (dat['kind'] == 'elite'), 'ws_reltime'],
    ax=axes[0], color=elite_color, alpha=0.3, hatch='//', label='Elite', kde=True, bins=nbins
)
sns.histplot(
    dat.loc[dat_mask_overall_10 & (dat['kind'] == 'radio'), 'ws_reltime'],
    ax=twins[0], color=radio_color, alpha=0.3, hatch='\\\\', label='Radio', kde=True, bins=nbins
)

ovrl_test = ss.ttest_ind(
    dat.loc[dat_mask_overall_10 & (dat['kind'] == 'elite'), 'ws_reltime'],
    dat.loc[dat_mask_overall_10 & (dat['kind'] == 'radio'), 'ws_reltime']
)

for ax, twin in zip(axes.flatten(), twins.flatten()):
    ax.set_xlabel('Time', fontsize=14)
    
    ax.set_ylabel('Count (Elite)', fontsize=14)
    twin.set_ylabel('Count (Radio)', fontsize=14, rotation=270, labelpad=15)

    fmt = mp.ticker.FuncFormatter(lambda x, pos: f'{x / 3600:.0f}h')
    ax.xaxis.set_major_formatter(fmt)

    ax.tick_params(axis='both', which='major', labelsize=11)
    twin.tick_params(axis='both', which='major', labelsize=11)
    
    h1, l1 = ax.get_legend_handles_labels()
    h2, l2 = twin.get_legend_handles_labels()
    ax.legend(h1 + h2, l1 + l2, loc=0, fontsize=14)

props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)

textstr = '\n'.join((
    r'$H_0: \rm{\bar{E}} = \rm{\bar{R}}$',
    r'$t=%.2f$' % (ovrl_test.statistic, ),
    r'$p = \rm{%.1e}$' % (ovrl_test.pvalue, ),
))
axes[0].text(0.6, 0.65, textstr, transform=axes[0].transAxes, fontsize=14,
             verticalalignment='top', bbox=props)

fig.tight_layout()

## Firehose / Radio

In [None]:
nbins = 20

fig, ax = plt.subplots(1, 1, figsize=(10, 10))
axes = np.asarray([ax])
twins = np.asarray([axes[0].twinx()])

sns.histplot(
    dat.loc[dat_mask_with_dh & (dat['kind'] == 'decahose'), 'ws_reltime'],
    ax=axes[0], color=decahose2_color, alpha=0.3, hatch='//', label='Firehose', kde=True, bins=nbins
)
sns.histplot(
    dat.loc[dat_mask_with_dh & (dat['kind'] == 'radio'), 'ws_reltime'],
    ax=twins[0], color=radio_color, alpha=0.3, hatch='\\\\', label='Radio', kde=True, bins=nbins
)

ovrl_test = ss.ttest_ind(
    dat.loc[dat_mask_with_dh & (dat['kind'] == 'decahose'), 'ws_reltime'],
    dat.loc[dat_mask_with_dh & (dat['kind'] == 'radio'), 'ws_reltime']
)

for ax, twin in zip(axes.flatten(), twins.flatten()):
    ax.set_xlabel('Time', fontsize=14)
    ax.set_ylabel('Count', fontsize=14)
    twin.set_ylabel(None)

    fmt = mp.ticker.FuncFormatter(lambda x, pos: f'{x / 3600:.0f}h')
    ax.xaxis.set_major_formatter(fmt)

    ax.tick_params(axis='both', which='major', labelsize=11)
    twin.tick_params(axis='both', which='major', labelsize=11)
    
    h1, l1 = ax.get_legend_handles_labels()
    h2, l2 = twin.get_legend_handles_labels()
    ax.legend(h1 + h2, l1 + l2, loc=0, fontsize=14)

props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)

textstr = '\n'.join((
    r'$H_0: \rm{\bar{E}} = \rm{\bar{R}}$',
    r'$t=%.2f$' % (ovrl_test.statistic, ),
    r'$p = \rm{%.1e}$' % (ovrl_test.pvalue, ),
))
axes[0].text(0.6, 0.65, textstr, transform=axes[0].transAxes, fontsize=14,
             verticalalignment='top', bbox=props)

fig.tight_layout()

# Bootstrap tests of item-level SDs

In [None]:
def bootstrap_test(g1, g2, stat=np.std, n_resamples=10, alpha=ss.norm(0, 1).cdf(5)):
    boots = []
    for i in range(n_resamples):
        boots += [stat(g1.sample(frac=1, replace=True))]
    boots = np.asarray(boots)
    # boots = ss.bootstrap([g1], stat, **kwargs)
    
    stat_g1, stat_g2 = stat(g1), stat(g2)
    dist = ss.norm(stat_g1, boots.std())  # std is close enough    
    pval = dist.cdf(stat_g2)
    pval = 2 * min(pval, 1 - pval)
    
    return {
        'stat_g1': stat_g1,
        'stat_g2': stat_g2,
        'boot_sd': boots.std(),
        'pval': pval,
        'signif': pval < 1 - alpha,
    }

## Overall elite / radio

In [None]:
bootstrap_test(
    dat.loc[dat_mask_overall_10 & (dat['kind'] == 'elite'), 'ws_reltime'],
    dat.loc[dat_mask_overall_10 & (dat['kind'] == 'radio'), 'ws_reltime'],
    n_resamples=10000
)

## Liberal elite / radio

In [None]:
bootstrap_test(
    dat.loc[dat_mask_lib & (dat['kind'] == 'elite'), 'ws_reltime'],
    dat.loc[dat_mask_lib & (dat['kind'] == 'radio'), 'ws_reltime'],
    n_resamples=10000
)

## Conservative elite / radio

In [None]:
bootstrap_test(
    dat.loc[dat_mask_con & (dat['kind'] == 'elite'), 'ws_reltime'],
    dat.loc[dat_mask_con & (dat['kind'] == 'radio'), 'ws_reltime'],
    n_resamples=10000
)