In [None]:
import sys
sys.path.append("..")
from samplingFunctions import *
import scipy.stats as stats
import matplotlib.pyplot as plt
import itertools

# Hypothesis 1
$H_0: \eta=4$

$H_a: \eta>4$

### Wilcoxon Signed Rank Test
Assumes $\epsilon_i \sim^{iid} \mathcal{N}(0,1)$

In [None]:
def wilcoxonExperiment(sampleFn, binned=False):
    sampleSizes = [30, 100, 500]
    trueMedians = range(1,8)
    numRepititions = 1000
    etaNull = 4
    totalNumTests = len(sampleSizes) * len(trueMedians) * numRepititions
    results = np.zeros((totalNumTests, 3))
    paramsets = itertools.product(sampleSizes, trueMedians, range(numRepititions))
    for setNum, pSet in enumerate(paramsets):
        if not setNum % 1000:
            print(setNum)
        n,eta, repetitionNum = pSet
        sample = sampleFn(numSamples=n, offset = eta, binned=binned)
        testinput = sample - etaNull
        assert not np.all(testinput==0)
        stat, pval = stats.wilcoxon(testinput, zero_method="pratt",alternative="greater")
        results[setNum] = [n, eta, pval]
    return results

In [None]:
wResultsControl = wilcoxonExperiment(sampleIndependentNormal)

In [None]:
wResultsViolation1 = wilcoxonExperiment(sampleIndependentContinuousAsymmetric)

In [None]:
wResultsViolation2 = wilcoxonExperiment(generateDependentSamplesLatentNormal)

In [None]:
wResultsViolation3 = wilcoxonExperiment(sampleIndependentNormal, binned=True)

## Sign Test
Assumes $\epsilon \sim^{ind}$ continuous

median=0

In [None]:
def signTest(sample, etaNull):
    alpha = 0.05
    b = sum(sample > etaNull)
    nStar = sum(sample != etaNull)
    p = stats.binom.cdf(nStar - b, nStar, 0.5)
    return b,p

In [None]:
def signExperiment(sampleFn, binned=False):
    sampleSizes = [30, 100, 500]
    trueMedians = range(1,8)
    numRepititions = 1000
    etaNull = 4
    totalNumTests = len(sampleSizes) * len(trueMedians) * numRepititions
    results = np.zeros((totalNumTests, 3))
    paramsets = itertools.product(sampleSizes, trueMedians, range(numRepititions))
    for setNum, pSet in enumerate(paramsets):
        if not setNum % 1000:
            print(setNum)
        n,eta, repetitionNum = pSet
        sample = sampleFn(numSamples=n, offset = eta, binned=binned)
        stat, pval = signTest(sample, etaNull)
        results[setNum] = [n, eta, pval]
    return results

In [None]:
stats.binom.cdf(8,10,.5)

In [None]:
stats.binom.cdf(1,10,.5)

Evaluate performance of test when following model assumptions

In [None]:
wilcoxonComparison(sampleIndependentNormal)

In [None]:
signTestComparison(sampleIndependentNormal)

Evaluate performance when violating normality

In [None]:
wilcoxonComparison(sampleIndpendentContinuousSymmetric)

In [None]:
signTestComparison(sampleIndpendentContinuousSymmetric)

Evaluate performance when violating symmentry

In [None]:
wilcoxonComparison(sampleIndependentContinuousAsymmetric)

In [None]:
signTestComparison(sampleIndependentContinuousAsymmetric)

evaluate performance when independence is violated

How do we visualize these tests? There is a lot of variability in the samples drawn from dependent data, so the setup above isn't as applicable.