FISHING: DAVE'S TELESCOPING STUDY

_Companion to ["Fishing"](https://github.com/sparshsah/foggy-demo/blob/main/demo/stats/fishing.pdf)_

[@sparshsah](https://github.com/sparshsah)

In [6]:
from typing import Final

import numpy as np

In [72]:
# DAVE'S STUDY PARAMETERS
# critical statistic (two-sided)
CRITICAL_Z_SCORE: Final[float] = 1.96
# based on convergence of the sampling distribution of the sample mean
# (which is technically Student-T distributed)to the Normal distribution
EXPECTED_P_VALUE: Final[float] = 0.05
# max number of subjects until Dave's death forces him to give up
MAX_S: Final[int] = 1_000
# # global running subject counter (number of participants in the current study)
# S: int = 0
# malloc
Y: np.ndarray[float] = np.full(shape=MAX_S, fill_value=np.nan)

# OUR SIMULATION PARAMETERS -- We're going to assume that the null hypothesis of no effect (H0: mu = 0) is true
# number of trials we're gonna average over to calculate significance level (i.e. p-value)
T: Final[int] = 100_000

In [69]:
def _reset_study() -> None:
    global Y
    Y[:] = np.nan
    return


def _study_one_subject(s: int = 1) -> None:
    """Observe subject number `s`."""
    Y[s-1] = rng.normal()
    return

def _decide_is_significant(s: int = 0) -> None:
    """Observe all `s` subjects in the study, then decide whether
    the study yielded statistically-significant evidence against the null.
    """
    sample_mean = np.mean(Y[:s])
    sample_std = np.std(Y[:s], ddof=1)
    stderr = sample_std / np.sqrt(s)
    z_score = sample_mean / stderr
    assert not np.isnan(z_score), f"Something went wrong! `s` = {s}."
    is_significant = abs(z_score) >= CRITICAL_Z_SCORE
    return is_significant


def study_all_subjects_fixed(max_s: int = 1_000) -> bool:
    """Conduct a study of exactly `max_s` subjects,
    then at the end decide whether |z-score| >= CRITICAL_Z_SCORE
    for the final study (i.e. based on all subjects).
    """
    # global S
    for s in range(1, max_s+1):
        _study_one_subject(s=s)
        # S = s
    return _decide_is_significant(s=s)

def study_all_subjects_telescoping(max_s: int = MAX_S) -> bool:
    """Conduct a telescoping series of studies of up to `max_s` subjects,
    and after each round decide whether |z-score| >= CRITICAL_Z_SCORE
    for the current iteration (i.e. based on subjects observed so far).
    """
    # global S
    for s in range(1, max_s+1):
        _study_one_subject(s=s)
        # S = s
        if _decide_is_significant(s=s):
            # Eureka! Stop the study.
            return True
    # :sadface: We ran out of funding and no success.
    # Dave's gravestone will read "Here Lies Dave -- His Life's Work Was Meaningless and Vain".
    return False

# The Fixed Case

Good --
In our simulations,
Based on our critical $z$-score of 1.96,
We see a $p$-value of 5\%, as expected.

In [74]:
rng = np.random.default_rng(seed=42)
sims = [study_all_subjects_fixed() for t in range(T)]
f"In our simulations, we see a p-value of {np.mean(sims):.2%}, against an expectation of {EXPECTED_P_VALUE:.2%}."

'In our simulations, we see a p-value of 5.06%, against an expectation of 5.00%.'