In [1]:
from scipy import stats
import numpy as np

### Hypothesis testing with one sample

#### mean - known population standard deviation - use normal

In [2]:
# Left and right tail tests for the mean 
# When the population standard deviation is somehow known, p-values found using a normal distribution

h = 15  # null hypothesis, mu <= h use right tail test, 
        #                  mu >= h use left tail test

n = 10  # number of samples 
x_bar = 15.2  # Observed average
sigma = 0.5  # Population standard deviation

pval = stats.norm.cdf(x_bar, h, (sigma / np.sqrt(n))) # Left tail test
pval_lefttail1 = 1 - stats.norm.cdf(x_bar, h, (sigma / np.sqrt(n))) # Right tail test
pval_lefttail2 = stats.norm.sf(x_bar, h, (sigma / np.sqrt(n))) # More accurate right tail test

print(pval) # if hypothesis were >=
print(pval_lefttail1) # if hypothesis were >=
print(pval_lefttail2)

0.897048394633965
0.10295160536603498
0.10295160536603498


#### proportion - treat as binomial modeled as normal

In [3]:
# proportion, two-tailed

h = 0.50  # hypothesis proportion

n = 100   # number of samples
p = 0.53  # proportion observed


sigma = np.sqrt(h * (1 - h) / n)  # for proportion from binomial

# explainatory path to p-value 
if h < p:
    right_tail = stats.norm.sf(p, h, sigma)
    left_tail = stats.norm.cdf(h + (h - p), h, sigma)
else:
    right_tail = stats.norm.sf(h + (h - p), h, sigma)
    left_tail = stats.norm.cdf(p, h, sigma)
pval = right_tail + left_tail
print(pval)

# one line to p-value       
pval = 2 * stats.norm.sf(h + np.abs(h - p), h, sigma)
    
print(pval)

0.5485062355001469
0.5485062355001469


#### unknown population standard deviation - use t-test

In [4]:
data = [1.11, 1.07, 1.11, 1.07, 1.12, 1.08, 0.98, 0.98, 1.02, 0.95, 0.95]
h = 1.00 # null hypothesis: mean <= 1

n = len(data)
x_bar = np.mean(data)
s = np.std(data, ddof=1) # sample std so ddof = 1
print(x_bar)

pval = stats.t.sf(x_bar, n - 1, h, s / np.sqrt(n)) 

print(pval)

# Or just use scipy method
#  Note: returns the test statistic, and 2 sided p-value 
print(stats.ttest_1samp(data, h)) 

# The one-tail that's interesting is half the two-tailed
# * i.e. hypothesis mean <= 1 will never be rejected by a mean <= 1
print(stats.ttest_1samp(data, h)[1] / 2) # 

# If you really want the one tail for the side that will never reject the null  
print(1 - (stats.ttest_1samp(data, h)[1]) / 2) 


1.0399999999999998
0.03586064860607561
Ttest_1sampResult(statistic=2.0137774303955394, pvalue=0.07172129721215122)
0.03586064860607561
0.9641393513939244


### Hypothesis testing with two independent samples

In [5]:
# Two populations

n_x = 9      # number of x
x_bar = 2    # average for x
s_x = 0.866  # sample sigma for x

n_y = 16     # number of y
y_bar = 3.2  # average for y
s_y = 1      # sample sigma for y

# DOF - Welch Test
dof = ((s_x ** 2 / n_x + s_y ** 2 / n_y) ** 2 / 
      (1 / (n_x - 1) * (s_x ** 2 / n_x) ** 2 + 
       1 / (n_y - 1) * (s_y ** 2 / n_y) ** 2))
print(dof)

# Standard Error
std_err = np.sqrt((s_x ** 2 / n_x) + (s_y ** 2 / n_y))

pval = (stats.t.sf(np.abs(x_bar - y_bar), dof, 0, std_err) +
        stats.t.cdf(-1 * np.abs(x_bar - y_bar), dof, 0, std_err))

print(pval)

# If the data is available scipy.stats.ttest_ind(a, b, equal_var=False) can be used
# Note: if equal_var: perform a student's independent 2 sample test 
#       (assumes equal population sizes and variances) 
#       if not equal_var: perform Welch’s t-test

18.84659125336577
0.005401921297382211


#### right tailed dist + Cohen's d

In [6]:
# h0 mean of X less than mean of Y # right tailed distribution   

n_x = 11
x_bar = 4
s_x = 1.5

n_y = 9
y_bar = 3.5
s_y = 1

# DOF - Welch Test
dof = ((s_x ** 2 / n_x + s_y ** 2 / n_y) ** 2 / 
      (1 / (n_x - 1) * (s_x ** 2 / n_x) ** 2 + 
       1 / (n_y - 1) * (s_y ** 2 / n_y) ** 2))

std_err = np.sqrt(s_x ** 2 / n_x + s_y ** 2 / n_y)

pval = (stats.t.sf(x_bar - y_bar, dof, 0, std_err))

print(pval)

# cohen's d

s_pooled = np.sqrt((((n_x - 1) * s_x ** 2) +
                    ((n_y - 1) * s_y ** 2)) / 
                   (n_x + n_y - 2))

cohen_d = (x_bar - y_bar) / s_pooled

print(cohen_d)


0.1928185434187067
0.3841106397986879


#### Two Independent Population Proportions

In [7]:
x_a = 20
n_a = 200
x_b = 12
n_b = 200

p_a = (x_a / n_a)
p_b = (x_b / n_b)
p_c = (x_a + x_b) / (n_a + n_b)  # pooled proportion

sigma = np.sqrt(p_c * (1 - p_c) * (1 / n_a + 1 / n_b))

if p_a < p_b:
    p_a, p_b = p_b, p_a
p_val = (stats.norm.sf((p_a - p_b), 0, sigma) +
         stats.norm.cdf(0 - (p_a - p_b), 0, sigma))

print(p_val)

# not sure you can one line this with scipy, but probably with stats models


0.1403686607716731


### Hypothesis testing with two dependent samples

In [8]:
data_before = [6.6, 6.5, 9.0, 10.3, 11.3, 8.1, 6.3, 11.6]
data_after =  [6.8, 2.4, 7.4,  8.5,  8.1, 6.1, 3.4,  2.0]

n = len(data_before)

before_array = np.array(data_before)
after_array = np.array(data_after)
diff_array = after_array - before_array

print(diff_array)

x_bar_diff = diff_array.mean()
s_diff = diff_array.std(ddof=1)
print(x_bar_diff, s_diff)

p_val = stats.t.cdf(x_bar_diff, n - 1, 0, s_diff / np.sqrt(n))

print(p_val)

# One line scipy stats method
#   divide p-val by two for one tailed test
stats.ttest_rel(data_after, data_before)[1] / 2


[ 0.2 -4.1 -1.6 -1.8 -3.2 -2.  -2.9 -9.6]
-3.1250000000000004 2.911430674329817
0.009477987786306367


0.009477987786306376

In [9]:
# right tailed

data_before = [205, 241, 338, 368]
data_after  = [295, 252, 330, 360]

n = len(data_before)

before_array = np.array(data_before)
after_array = np.array(data_after)
diff_array = after_array - before_array

print(diff_array)

x_bar_diff = diff_array.mean()
s_diff = diff_array.std(ddof=1)

p_val = stats.t.sf(x_bar_diff, n - 1, 0, s_diff / np.sqrt(n))

print(p_val)

print(stats.ttest_rel(data_after, data_before)[1] / 2) # divide by two for one tailed test 
print(stats.ttest_rel(data_before, data_after)[1] / 2) # ttest_rel tests the hypothesis that might be rejected



[90 11 -8 -8]
0.2149441957535246
0.2149441957535246
0.2149441957535246


### Chi-squared test

#### Goodness-of-fit

In [10]:
exp_dist = [12, 12, 12, 12, 12]  # Expected distribution
obs_dist = [15, 12,  9,  9, 15]  # Oberved distribution

dof = len(exp_dist) - 1

exp = np.array(exp_dist)
obs = np.array(obs_dist)

chi_stat = sum(((obs - exp) ** 2) / exp)

p_val = stats.chi2.sf(chi_stat, dof)

print(p_val)


# one line scipy stats method
#   dof defaults to k - 1, method accepts ddof argument which will set dof to k - 1 - ddof
print(stats.chisquare(obs_dist, exp_dist))



0.5578254003710748
Power_divergenceResult(statistic=3.0, pvalue=0.5578254003710748)


#### Independence

In [29]:
data = np.array([[111, 96, 48],
                 [96, 133, 61],
                 [91, 150, 53]])

n_terms = data.sum()
n_rows = len(data[:, 0])
n_cols = len(data[0, :])
dof = (n_rows - 1) * (n_cols - 1)

# Note - there's probably a better way to do this
expected = np.array([[(data[:, j].sum() * data[i, :].sum()) / n_terms 
                      for j in range(n_cols)] for i in range(n_rows)])

chi_stat = (((data - expected) ** 2) / expected).sum()

p_val = stats.chi2.sf(chi_stat, dof)

print(expected)
print(chi_stat)
print(p_val)
print()

# scipy stats method
print(stats.chi2_contingency(data))

[[ 90.57210965 115.19070322  49.23718713]
 [103.00357569 131.0011919   55.99523242]
 [104.42431466 132.80810489  56.76758045]]
12.990918513170868
0.011320253054188366

(12.990918513170868, 0.011320253054188366, 4, array([[ 90.57210965, 115.19070322,  49.23718713],
       [103.00357569, 131.0011919 ,  55.99523242],
       [104.42431466, 132.80810489,  56.76758045]]))


#### Homogeneity

This is the test for independence used to determine the probability that two populations have the same distribution.

In [31]:
data = np.array([[72, 84, 49, 45],
                 [91, 86, 88, 35]])

n_terms = data.sum()
n_rows = 2  # Should always be two here
n_cols = len(data[0, :])
dof = n_cols - 1

# Note - there's probably a better way to do this
expected = np.array([[(data[:,i].sum() * data[j,:].sum()) / n_terms 
                      for i in range(n_cols)] for j in range(n_rows)])

chi_stat = (((data - expected) ** 2) / expected).sum()

p_val = stats.chi2.sf(chi_stat, dof)

print(expected)
print(chi_stat)
print(p_val)
print()

[[74.09090909 77.27272727 62.27272727 36.36363636]
 [88.90909091 92.72727273 74.72727273 43.63636364]]
10.128696811826693
0.017503254828611012



#### Test of a single variance

In [53]:
n = 25
s = 3.5
h = 7.2

dof = n - 1

chi_stat = ((n - 1) * s ** 2) / h ** 2

print(chi_stat)

p_val = stats.chi2.cdf(chi_stat, dof)  # left tailed

print(p_val)

0.236304012345679
0.3731116105280825


In [54]:
# increasing n increase test statistic, but decreases p-val for both left and right handed tests,
# guessing this is related to the change in distribution with dof?

n = 50
s = 3.5
h = 7.2

dof = n - 1

chi_stat = ((n - 1) * s ** 2) / h ** 2

print(chi_stat)

p_val = stats.chi2.cdf(chi_stat, dof)  # left tailed

print(p_val)

0.945216049382716
0.082013899384776


In [42]:
n = 25
s = 7.2
h = 3.5

dof = n - 1

chi_stat = ((n - 1) * s ** 2) / h ** 2

print(chi_stat)

p_val = stats.chi2.sf(chi_stat, dof)  # right tailed

print(p_val)

101.56408163265307
1.623621440614767e-11


In [51]:
n = 50
s = 7.2
h = 3.5

dof = n - 1

chi_stat = ((n - 1) * s ** 2) / h ** 2

print(chi_stat)

p_val = stats.chi2.sf(chi_stat, dof)  # right tailed

print(p_val)

207.36
2.2435449579324275e-21
