# Sample size

In [75]:
import numpy as np
import math
from scipy import stats

## Margin of error

p : standard deviation of population  
n : population size  
z : z-score of ci (number of sd the proportion is away from the mean)  

$ \large e = z \frac{s}{\sqrt{n}} $

In [91]:
def error_margin(z, s, n):
    return z * (s / math.sqrt(n))

error_margin(1.96, 1, 100)

0.196

## Z-score of CI

Z-scores for confidence levels:

80%  1.28   
85%  1.44   
90%  1.65    
95%  1.96  
99%  2.58  

 ![](zscore.png)

In [28]:
tail = 1 - stats.norm.cdf(1.96)
ci = 1 - (tail * 2)
ci

0.950004209703559

In [31]:
ci = 0.95
tail = (1 - ci)/2
-stats.norm.ppf(tail)

1.959963984540054

## Sample size formula

p : p is the (estimated) proportion of the population which has the attribute in question
n : population size
e : Margin of error 
z : z-score of ci (number of sd the proportion is away from the mean)

  
$ \large s = \frac{z^2 p(1-p) \; / \; e^2} {1 + z^2 p(1-p) \; / \; e^2 n} $
  

In [71]:
def sample_size(z,p,e,n):
    num = (z**2 * p * (1-p))/ e**2 
    den = 1 + (z**2 * p * (1-p))/ (e**2 * n)
    return num/den

sample_size(1.96, 0.5, 0.03, 100)

91.43183549124143

#### Crochan's formula (for large populations)

In [69]:
def crochan(z,p,e):
    return (z**2 * p * (1-p)) / e**2
    
crochan(1.96, 0.5, 0.03)

1067.111111111111

## Credits & Links

https://www.surveymonkey.com/mp/sample-size-calculator/  
https://www.statisticshowto.com/probability-and-statistics/find-sample-size/
https://www.investopedia.com/terms/z/zscore.asp  
https://www.statology.org/z-score-python/  
https://www.surveymonkey.com/mp/margin-of-error-calculator/