In [5]:
from scipy import stats
import numpy as np

### Hypothesis testing with one sample

#### mean - known population standard deviation - use normal

In [39]:
# Left and right tail tests for the mean 
# When the population standard deviation is somehow known, p-values found using a normal distribution

h = 15  # null hypothesis, mu <= h use right tail test, 
        #                  mu >= h use left tail test

n = 10  # number of samples 
mu = 15.2  # Observed average
sigma = 0.5  # Population standard deviation

pval = stats.norm.cdf(mu, h, (sigma / np.sqrt(n))) # Left tail test
pval_lefttail1 = 1 - stats.norm.cdf(mu, h, (sigma / np.sqrt(n))) # Right tail test
pval_lefttail2 = stats.norm.sf(mu, h, (sigma / np.sqrt(n))) # More accurate right tail test

print(pval) # if hypothesis were >=
print(pval_lefttail1) # if hypothesis were >=
print(pval_lefttail2)

0.897048394634
0.102951605366
0.102951605366


#### proportion - treat as binomial modeled as normal

In [40]:
# proportion, two-tailed

h = 0.50  # hypothesis proportion

n = 100   # number of samples
p = 0.53  # proportion observed


sigma = np.sqrt(h * (1 - h) / n)  # for proportion from binomial

# explainatory path to p-value 
if h < p:
    right_tail = stats.norm.sf(p, h, sigma)
    left_tail = stats.norm.cdf(h + (h - p), h, sigma)
else:
    right_tail = stats.norm.sf(h + (h - p), h, sigma)
    left_tail = stats.norm.cdf(p, h, sigma)
pval = right_tail + left_tail
print(pval)

# one line to p-value       
pval = 2 * stats.norm.sf(h + np.abs(h - p), h, sigma)
    
print(pval)

0.5485062355
0.5485062355


#### unknown population standard deviation - use t-test

In [41]:
data = [1.11, 1.07, 1.11, 1.07, 1.12, 1.08, 0.98, 0.98, 1.02, 0.95, 0.95]
h = 1.00 # null hypothesis - mean < 1

n = len(data)
mu = np.mean(data)
sigma = np.std(data, ddof=1) # sample std so ddof = 1

pval = stats.t.sf(mu, n - 1, h, sigma / np.sqrt(n)) 

print(pval)





0.0358606486061


### Hypothesis testing with two samples