# Confidence intervals

Import python modules

In [1]:
import matplotlib.pylab as plt
import numpy as np
from scipy import stats

### Normal distribution

In [2]:
mu = 36.9
sigma = 13.9
x = 23
x2 = 64.7
per = 0.90 # percentile
print(stats.norm.cdf(x2, mu, sigma) - stats.norm.cdf(x, mu, sigma))
print(stats.norm.ppf(per, mu, sigma)) #  Inverse of cdf
print()

# Confidence interval bounds in SDs
  # note, techincally requires known population SD to apply

# z-scores (multiple by standard error of the mean)
print(stats.norm.ppf(0.025, 0, 1)) # lower bound z-score
print(stats.norm.ppf(0.975, 0, 1)) # upper bound z-score

# or go straight to getting bounds with .interval
x_bar = 1.024
sigma = 0.337
n = 30
print(stats.norm.interval(0.98, x_bar, (sigma / np.sqrt(n)))) 

0.8185946141203637
54.71356676106994

-1.9599639845400545
1.959963984540054
(0.8808656297226961, 1.1671343702773038)


In [3]:
# n required for desired confidence and bounds of mean
ebm = 2
conf = 0.95
sigma = 15

alpha = (1 - conf)
z = (stats.norm.ppf((alpha / 2), 0, 1)) 
n = np.ceil((z ** 2 * sigma ** 2) / (ebm ** 2))
print(n)

217.0


In [4]:
# confidence interval of a proprotion
#  not using +4 method here (see below)

n = 500
p = 421 / n
cl = 0.95

alpha = 1 - cl
z_lower = stats.norm.ppf(alpha / 2)
z_upper = stats.norm.ppf(1 - (alpha / 2))

s = np.sqrt((p * (1 - p)) / n)

ebp = z_upper * s

print(p - ebp, p + ebp)

print(stats.norm.interval(cl, p, s))

0.8100296288520179 0.873970371147982
(0.8100296288520179, 0.873970371147982)


In [5]:
# confidence interval of a proprotion
#  using +4 method here

n = 25     # number of samples
pn = 6     # number of successes
cl = 0.95  # confidence level

n += 4     # applying +4 method
p = (pn + 2) / n   # probability of success, applying +4 method

alpha = 1 - cl
z_lower = stats.norm.ppf(alpha / 2)
z_upper = stats.norm.ppf(1 - (alpha / 2))

s = np.sqrt(p * (1 - p) / n)

ebp = z_upper * s

print(p - ebp, p + ebp)

print(stats.norm.interval(cl, p, s))

0.11319271756780241 0.43853142036323206
(0.11319271756780241, 0.43853142036323206)


In [6]:
# n required for desired confidence and bounds of proportion
ebm = 0.03
conf = 0.90

alpha = (1 - conf)
z = (stats.norm.ppf((alpha / 2), 0, 1))
n = np.ceil((z ** 2 * 0.25) / (ebm ** 2))
print(n)

752.0


### Students T

In [7]:
data = [8.6, 9.4, 7.9, 6.8, 8.3, 7.3, 9.2, 9.6, 8.7, 11.4, 10.3, 5.4, 8.1, 5.5, 6.9]
cl = 0.95

mu = np.mean(data) # sample mean
s = np.std(data, ddof=1) # sample standard deviation
n = len(data)
print(n, mu, s)

alpha = 1 - cl
t_lower = (stats.t.ppf((alpha / 2), n - 1)) 
t_upper = (stats.t.ppf((1 - (alpha / 2)), n - 1)) 
ebm = t_upper * (s / np.sqrt(n))
print(mu - ebm, mu + ebm)

print(stats.t.interval(1 - alpha, n - 1, loc=mu, scale=(s / np.sqrt(n))))

15 8.226666666666667 1.6722383060978339
7.300611959652363 9.15272137368097
(7.300611959652363, 9.15272137368097)


In [8]:
data = [ 79, 145, 147, 160, 116, 100, 159, 151, 156, 126,
        137,  83, 156,  94, 121, 144, 123, 114, 139,  99]
cl = 0.90

n = len(data)
mu = np.mean(data) # sample mean
s = np.std(data, ddof=1) # sample standard deviation
print(n, mu, s)

alpha = 1 - cl
t_lower = stats.t.ppf((alpha / 2), n - 1)
t_upper = stats.t.ppf((1 - alpha / 2), n - 1)
ebm = t_upper * (s / np.sqrt(n))
print(mu - ebm, mu + ebm)

print(stats.t.interval(cl, n - 1, loc=mu, scale=(s / np.sqrt(n))))

20 127.45 25.964500055997508
117.41093378346815 137.48906621653185
(117.41093378346815, 137.48906621653185)
