# Bootstrap test of birthweight difference

We run bootstrap resampling from pooled drug and control birthweights,
to establish the null-world distribution of the mean difference in
weights. Then we compare the observed difference in means to the
null-world distribution to ask if the actual value is plausible in the
null world.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

rnd = np.random.default_rng()

# Get treatment and control values from data file.
birth_df = pd.read_csv('data/birthweights.csv')
# Birthweidhts for Drug A participants.
treat_df = birth_df[birth_df['Treatment'] == 'Drug A']
# Birthweigts as array.
treat = np.array(treat_df['Birthweight'])
# Control birthweights.
control_df = birth_df[birth_df['Treatment'] == 'Control']
# Birthweigts as array.
control = np.array(control_df['Birthweight'])

# Actual difference.
actual_diff = np.mean(treat) - np.mean(control)
# Show the actual difference.
actual_diff

Proceed with the simulation:

In [None]:
# Concatenate treatment and control arrays.
both = np.concatenate([treat, control])

# Number of trials.
n_trials = 10_000

# Make array to store results for each trial.
results = np.zeros(n_trials)

# Do 10000 simulations
for i in range(n_trials):
    # Take a resample of 15 from all birth weights.
    fake_treat = rnd.choice(both, size=15)
    # Take a second, similar resample.
    fake_control = rnd.choice(both, size=15)
    # Find the means of the two resamples.
    mt = np.mean(fake_treat)
    mc = np.mean(fake_control)
    # Find the difference between the means of the two resamples.
    diff = mt - mc
    # Keep score of the result.
    results[i] = diff
    # End the simulation experiment, go back and repeat
# Produce a histogram of the resample differences
plt.hist(results, bins=25)
plt.title('Null-world distribution of treatment/control difference')
plt.xlabel('Null-world mean treatment - mean control')
# How often did resample differences exceed the observed difference of
.82?
k = sum(results >= actual_diff)
kk = k / n_trials
print('Proportion null-world differences >= actual difference:', kk)

Proceed with the simulation: