# Confidence interval

In [1]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
%matplotlib inline
import random
from tqdm import tqdm
import pandas as pd
plt.style.use('ggplot')

### Normal distribution (variance is known)

In [2]:
population = stats.norm.rvs(loc = 2, scale = 5, size = 100000) # fix some normal disctribution

In [3]:
sample_size = 100
sample = np.random.choice(a = population, size = sample_size) # get some sample from this discrubution

In [4]:
sample_mean = sample.mean() # get sample mean
st_dev = population.std() # get sample variace

In [5]:
z_value = stats.norm.ppf(q = 0.975) # get right z-value
print('right z-value:', z_value)

right z-value: 1.959963984540054


In [6]:
z_value = stats.norm.ppf(q = 0.025) # get left z-value
print('left z-value:', z_value)

left z-value: -1.9599639845400545


In [7]:
interval = z_value * (st_dev/np.sqrt(sample_size))
conf_inv = (sample_mean - interval, sample_mean + interval) # get confidence interval
print('Confidence interval:', conf_inv)

Confidence interval: (2.9176708022662847, 0.9597811829791674)


In [8]:
# add confidence interval computing into one function
def comp_ci(sample, st_dev): 
    z_value = stats.norm.ppf(q = 0.975)
    sample_size = len(sample)
    interval = z_value * (st_dev/np.sqrt(sample_size))
    conf_inv = (sample_mean - interval, sample_mean + interval)
    return conf_inv

In [10]:
np.random.seed(5)
sample_size = 2000
sample = np.random.choice(a = population, size = sample_size)

ci = comp_ci(sample, st_dev) # get confidence interval for other sample

print('conf interval for 2000 sample size:', ci)

conf interval for 2000 sample size: (1.7198272785643671, 2.157624706681085)


### Student's distribution (variance is unknown)

In [11]:
def compute_ci_t(sample, alpha = 0.95):
    
    n = sample.shape[0]
    mu, se = np.mean(sample), stats.sem(sample)
    bound = se * stats.t.ppf((1 + alpha)/2., n-1)
    
    return mu - bound, mu + bound

In [12]:
sample = np.random.choice(a = population, size = 30)
ci_t = compute_ci_t(sample, alpha = 0.95)
print('conf interval with t test for 2000 sample size:', ci_t)

conf interval with t test for 2000 sample size: (-0.6968230098681322, 2.5538371493474674)
