In [None]:
import sys
sys.path.append("..")
from samplingFunctions import *
import scipy.stats as stats
import matplotlib.pyplot as plt
import itertools
import pandas as pd

# Hypothesis 1
$H_0: \eta=4$

$H_a: \eta>4$

### Wilcoxon Signed Rank Test
Assumes $\epsilon_i \sim^{iid} \mathcal{N}(0,1)$

In [None]:
def wilcoxonExperiment(sampleFn, discrete=False, errorMean=0):
    sampleSizes = [30, 100, 500]
    trueMedians = np.arange(1,8) + errorMean
    numRepititions = 1000
    etaNull = 4
    totalNumTests = len(sampleSizes) * len(trueMedians) * numRepititions
    results = np.zeros((totalNumTests, 3))
    paramsets = itertools.product(sampleSizes, trueMedians, range(numRepititions))
    for setNum, pSet in enumerate(paramsets):
        if not setNum % 1000:
            print(setNum)
        n,eta, repetitionNum = pSet
        sample = sampleFn(numSamples=n, offset = eta, discrete=discrete)
        testinput = sample - etaNull
        assert not np.all(testinput==0)
        stat, pval = stats.wilcoxon(testinput, zero_method="pratt",alternative="greater")
        results[setNum] = [n, eta, pval]
    return results

In [None]:
wResultsControl = wilcoxonExperiment(sampleIndependentNormal)

In [None]:
wResultsViolation1 = wilcoxonExperiment(sampleIndependentNormal, errorMean = 1)

In [None]:
wResultsViolation2 = wilcoxonExperiment(sampleIndependentContinuousAsymmetric)

In [None]:
wResultsViolation3 = wilcoxonExperiment(generateDependentSamplesLatentNormal)

In [None]:
wResultsViolation4 = wilcoxonExperiment(sampleIndependentNormal, discrete=True)

## Sign Test
Assumes $\epsilon \sim^{ind}$ continuous

median=0

In [None]:
def signTest(sample, etaNull):
    alpha = 0.05
    b = sum(sample > etaNull)
    nStar = sum(sample != etaNull)
    p = stats.binom.cdf(nStar - b, nStar, 0.5)
    return b,p

In [None]:
def signExperiment(sampleFn, discrete=False, errorMedian=0):
    sampleSizes = [30, 100, 500]
    trueMedians = range(1,8)
    numRepititions = 1000
    etaNull = 4
    totalNumTests = len(sampleSizes) * len(trueMedians) * numRepititions
    results = np.zeros((totalNumTests, 3))
    paramsets = itertools.product(sampleSizes, trueMedians, range(numRepititions))
    for setNum, pSet in enumerate(paramsets):
        if not setNum % 1000:
            print(setNum)
        n,eta, repetitionNum = pSet
        sample = sampleFn(numSamples=n, offset = eta + errorMedian, discrete=discrete)
        stat, pval = signTest(sample, etaNull)
        results[setNum] = [n, eta, pval]
    return results

In [None]:
sResultControl = signExperiment(sampleIndependentNormal)

In [None]:
sResultV1 = signExperiment(sampleIndependentNormal, errorMedian=1)

In [None]:
sResultsV2 = signExperiment(sampleIndependentNormal, discrete=True)

In [None]:
sResultsV3 = signExperiment(generateDependentSamplesLatentNormal)

In [None]:
dfwResultsControl = pd.DataFrame(wResultsControl,columns=["size", "median", "p-Val"])
dfwResultsControl["treatment"] = "Wilcoxon Signed Rank Test"

In [None]:
dfwResultsControl.head()

In [None]:
dfwResultsControl.to_csv("../results/wResultsControl.csv")

In [None]:
dfwResultsViolation1 = pd.DataFrame(wResultsViolation1,columns=["size", "median", "p-Val"])
dfwResultsViolation1["treatment"] = "Wilcoxon Signed Rank Test"

In [None]:
dfwResultsViolation1.head()

In [None]:
dfwResultsViolation1.to_csv("../results/wResultsViolation1.csv")

In [None]:
dfwResultsViolation2 = pd.DataFrame(wResultsViolation2,columns=["size", "median", "p-Val"])
dfwResultsViolation2["treatment"] = "Wilcoxon Signed Rank Test"

In [None]:
dfwResultsViolation2.head()

In [None]:
dfwResultsViolation2.to_csv("../results/wResultsViolation2.csv")

In [None]:
dfwResultsViolation3 = pd.DataFrame(wResultsViolation3,columns=["size", "median", "p-Val"])
dfwResultsViolation3["treatment"] = "Wilcoxon Signed Rank Test"

In [None]:
dfwResultsViolation3.head()

In [None]:
dfwResultsViolation3.to_csv("../results/wResultsViolation3.csv")

In [None]:
dfwResultsViolation4 = pd.DataFrame(wResultsViolation4,columns=["size", "median", "p-Val"])
dfwResultsViolation4["treatment"] = "Wilcoxon Signed Rank Test"

In [None]:
dfwResultsViolation4.to_csv("../results/wResultsViolation4.csv")

In [None]:
dfsResultControl = pd.DataFrame(sResultControl,columns=["size", "median", "p-Val"])
dfsResultControl["treatment"] = "Sign Test"

In [None]:
dfsResultControl.head()

In [None]:
dfsResultControl.to_csv("../results/sResultControl.csv")

In [None]:
dfsResultV1 = pd.DataFrame(sResultV1,columns=["size", "median", "p-Val"])
dfsResultV1["treatment"] = "Sign Test"

In [None]:
dfsResultV1.to_csv("../results/sResultsViolation1.csv")

In [None]:
dfsResultV2 = pd.DataFrame(sResultsV2,columns=["size", "median", "p-Val"])
dfsResultV2["treatment"] = "Sign Test"

In [None]:
dfsResultV2.to_csv("../results/sResultsViolation2.csv")

In [None]:
dfsResultV3 = pd.DataFrame(sResultsV3,columns=["size", "median", "p-Val"])
dfsResultV3["treatment"] = "Sign Test"

In [None]:
dfsResultV3.to_csv("../results/sResultsViolation3.csv")