## Confidence intervals

In [2]:
import statsmodels.api as sm

In [5]:
from statsmodels.stats.proportion import proportion_confint

In [6]:
proportion_confint( count =  310, #number of successes
                    nobs = 1126, # number of trials
                    alpha = 1 - 0.95) #.05 CI

(0.24922129423231776, 0.30140037539468045)

In [7]:
proportion_confint(310, 1126, alpha=(1-.99))

(0.24102336643386685, 0.30959830319313136)

## Testing proportions

In [9]:
from statsmodels.stats.proportion import proportions_ztest


In [14]:
res = proportions_ztest(count=310, 
                      nobs = 1126,
                      value = 0.3, # The hypothesized value of population proportion p
                      alternative = 'two-sided') # Test the 'not equal to' alternative hypothesis

In [16]:
res #The first entry is the value of the test statistics, and the second is the p-value

(-1.8547614674673856, 0.06363029677684083)

## Testing common proporitons

In [17]:
import numpy as np


In [18]:
np.array([100, 144])

array([100, 144])

In [20]:
proportions_ztest(count = np.array([100, 144]),
                 nobs = np.array([516, 510]), 
                 alternative = 'two-sided')

(-3.3311633021935454, 0.0008648385153047968)

## chi-square test

In [21]:
from scipy.stats import chisquare

In [23]:
chisquare(f_obs = [98, 117, 80, 73, 96], # Observed frequency for each year
         f_exp= [464*.2, 464*.2, 464*.2, 464*.2, 464*.2]) # E xpected frequency under the null hypothesis

Power_divergenceResult(statistic=12.70258620689655, pvalue=0.012824256359695625)

## Confidence interval for the mean

In [24]:
res

(-1.8547614674673856, 0.06363029677684083)

In [25]:
res = np.array([.23, .34, .45, .56, .56, .67])

In [26]:
res.mean()

0.4683333333333333

In [27]:
from statsmodels.stats.weightstats import _tconfint_generic

In [28]:
_tconfint_generic(mean=res.std(), # The mean of the data
                  std_mean= res.std()/np.sqrt(len(res)), # The standard deviation deviation of the mean (s/sqrt(n)
                 dof = len(res) - 1, # df=n-1
                 alpha = (1 - 0.95), # 1-CI
                 alternative='two-sided')
# 1 is not in the CI

(-0.0073069860235879525, 0.30292310346120144)

## Hypothesis test for the mean

In [29]:
from statsmodels.stats.weightstats import _tstat_generic

In [33]:
_tstat_generic(value1=res.mean(), # The mean of the dataset
              value2=0,
              diff=1, # The mean under the null hypothesis
              std_diff = res.std()/np.sqrt(len(res)), #std of mean
              dof = len(res) - 1, # The degrees of freedom
              alternative='smaller') # The direction of the alternative(the true mean is smaller than 1)

(-8.810832493627899, 0.00015633023817394613)

## Two sample test for common mean

In [34]:
res_A = np.array([1.32, 3.23, 1.23, 0.43, 0.131])
res_B = np.array([.01, .23, .99, 1.23, 2.24])

In [36]:
from statsmodels.stats.weightstats import ttest_ind

In [38]:
ttest_ind(res_A, res_B, # The datasets
         alternative='two-sided',
         usevar = 'unequal')

(0.4894092668824881, 0.6388444013237312, 7.337317640693181)

## One way ANOVA

In [39]:
from scipy.stats import f_oneway

In [40]:
f_oneway(res_A, res_B)

F_onewayResult(statistic=0.23952143051045477, pvalue=0.6376882473468715)