In [1]:
from scipy import stats

In [2]:
stats.t.ppf(1-0.025, 21)

2.079613844727662

In [3]:
#tt = (sm-m)/np.sqrt(sv/float(n))  # t-statistic for mean
#pval = stats.t.sf(np.abs(tt), n-1)*2  # two-sided pvalue = Prob(abs(t)>tt)
#print('t-statistic = %6.3f pvalue = %6.4f' % (tt, pval))
#t-statistic =  0.391 pvalue = 0.6955

stats.t.ppf(1-0.05,43)

1.840633704139501e-19

In [5]:
stats.t.ppf(1-0.05,43)

1.681070701847763

In [30]:
import numpy as np

def ind_sample_t_test(x, y, alpha, mu_x, mu_y, side=None):
    '''
    Inputs:
    x: input list of numbers
    y: input list of numbers
    alpha: significance level
    mu_x: population mean for x
    mu_y: population mean for y
    side: can be "R" for right tailed test
          can be "L" for left tailed test
          can be None for 2 tailed test
          
    Output:
    dictionary: {'side':side, 'df':df, 'alpha':alpha, 't_critical_value':t_critical_values,\
           't_statistic': t_statistic, 'std_err': se, 'pooled_var':pooled_var}
    '''
    x_numpy_list = np.array(x)
    y_numpy_list = np.array(y)
    
    x_bar = np.mean(x_numpy_list)
    y_bar = np.mean(y_numpy_list)
    
    #Find degrees of freedom
    df = len(x) + len(y) - 2
    
    if side == None:
        critical_probability = 1 - (alpha/2)      
    elif side == "R":
        critical_probability = 1-alpha
    elif side == "L":
        critical_probability = alpha
    else:
        print('Error. You must supply side="L" or side = "R" or side = None. But you supplied side as {}'.format(side))
    
    #Find the critical value for the supplied significance level
    t_critical = stats.t.ppf(critical_probability,df)     
    
    if side == None:
        t_critical_values = [-1*t_critical, t_critical]
    if side == "R":
        t_critical_values = [None, t_critical]
    if side == "L":
        t_critical_values = [t_critical, None]

    #print("t_critical value for alpha({}) is {}".format(alpha,t_critical_values))
    #Find the critical value
    
    #Pooled variance:
    pooled_var =    (np.sum((x_numpy_list - x_bar)**2) + 
                    np.sum((y_numpy_list - y_bar)**2))/df
    
    #Corrected std. error
    se = np.sqrt(pooled_var/len(x) + pooled_var/len(y))    
    
    #print("pooled variance {}".format(pooled_var))
    #print("x_bar: {} y_bar: {}".format(x_bar, y_bar))
    
    t_statistic = ((x_bar - y_bar) - (mu_x - mu_y))/ se
    #print(t_statistic)
    
    #Finding p-value of the t_statistic
    if side == None:
        p_value = stats.t.sf(np.abs(t_statistic), df)*2
    else:
        p_value = stats.t.sf(np.abs(t_statistic), df)
    #print("p_value: ", p_value)
    return {'side':side, 'df':df, 'alpha':alpha, 't_critical_value':t_critical_values,\
           't_statistic': t_statistic, 'std_err': se, 'pooled_var':pooled_var}

In [29]:
ind_sample_t_test([5,6,1,-4], [3,7,8], 0.05, 0, 0)

{'side': None,
 'df': 5,
 'alpha': 0.05,
 't_critical_value': [-2.5705818366147395, 2.5705818366147395],
 't_statistic': -1.343320988518935,
 'std_err': 2.9776948578836393,
 'pooled_var': 15.2}

In [39]:
def ind_sample_t_test_means(x_bar, y_bar,nx, ny, pooled_var, alpha, mu_x, mu_y, side=None):
    '''
    Inputs:
    x_bar: mean of x group
    y_bar: mean of y group
    alpha: significance level
    mu_x: population mean for x
    mu_y: population mean for y
    side: can be "R" for right tailed test
          can be "L" for left tailed test
          can be None for 2 tailed test
          
    Output:
    dictionary: {'side':side, 'df':df, 'alpha':alpha, 't_critical_value':t_critical_values,\
           't_statistic': t_statistic, 'std_err': se, 'pooled_var':pooled_var}
    '''    
    #Find degrees of freedom
    df = nx + ny - 2
    
    if side == None:
        critical_probability = 1 - (alpha/2)      
    elif side == "R":
        critical_probability = 1-alpha
    elif side == "L":
        critical_probability = alpha
    else:
        print('Error. You must supply side="L" or side = "R" or side = None. But you supplied side as {}'.format(side))
    
    #Find the critical value for the supplied significance level
    t_critical = stats.t.ppf(critical_probability,df)     
    
    if side == None:
        t_critical_values = [-1*t_critical, t_critical]
    if side == "R":
        t_critical_values = [None, t_critical]
    if side == "L":
        t_critical_values = [t_critical, None]

    #print("t_critical value for alpha({}) is {}".format(alpha,t_critical_values))
    #Find the critical value
        
    #Corrected std. error
    se = np.sqrt(pooled_var/nx + pooled_var/ny)    
    
    #print("pooled variance {}".format(pooled_var))
    #print("x_bar: {} y_bar: {}".format(x_bar, y_bar))
    
    t_statistic = ((x_bar - y_bar) - (mu_x - mu_y))/ se
    #print(t_statistic)
    
    #Finding p-value of the t_statistic
    if side == None:
        p_value = stats.t.sf(np.abs(t_statistic), df)*2
    else:
        p_value = stats.t.sf(np.abs(t_statistic), df)
    #print("p_value: ", p_value)
    return {'side':side, 'df':df, 'alpha':alpha, 't_critical_value':t_critical_values,\
           't_statistic': t_statistic, 'std_err': se, 'pooled_var':pooled_var, 'p_value': p_value}

In [40]:
ind_sample_t_test_means(x_bar=3.8, y_bar=2.1, pooled_var = .13,nx = 18, ny = 25, alpha=.05, mu_x=0, mu_y=0, side="R")

{'side': 'R',
 'df': 41,
 'alpha': 0.05,
 't_critical_value': [None, 1.6828780004112913],
 't_statistic': 15.252789416141766,
 'std_err': 0.11145502331533659,
 'pooled_var': 0.13,
 'p_value': 8.413848547213937e-19}

In [42]:
ind_sample_t_test_means(x_bar=12, y_bar=8, pooled_var = 5.1,nx = 52, ny = 57, alpha=.05, mu_x=6, mu_y=3, side=None)

{'side': None,
 'df': 107,
 'alpha': 0.05,
 't_critical_value': [-1.9823833701230174, 1.9823833701230174],
 't_statistic': 2.3090894797690584,
 'std_err': 0.4330711342117475,
 'pooled_var': 5.1,
 'p_value': 0.022859587257110004}

### Question

Two populations x and y have means of $\mu_x = 6$ and $\mu_y = 3$. So the difference between $\mu_x - \mu_y = 3$. A change has been introduced to $x$, and we want to test the hypothesis that $\mu_x \ne \mu_y$. We took a sample of size 52 from $x$ and 57 from $y$. The mean values of sample $\bar{x}$ and $\bar{y}$ are 12 and 8 respectively. Based on this, define the null and alternate hypothesis, and perform hypothesis testing. Consider the pooled variance as 5.1

$$H_0: \mu_x - \mu_y = 3$$
$$H_1: \mu_x - \mu_y \ne 3$$

$$\alpha = 0.05$$


In [46]:
se = np.sqrt(5.1/52 + 5.1/57)

t_statistic = ((12 - 8) - (3))/se

df = 52+57 - 2

#p_value of t_statistic:

print("p_value: ", stats.t.sf(t_statistic, df))
print("t_statistic: ",t_statistic)
print("t_critical values: ", stats.t.ppf(0.025, df), ",",stats.t.ppf(1-0.025, df))



p_value:  0.011429793628555002
t_statistic:  2.3090894797690584
t_critical values:  -1.9823833701230178 , 1.9823833701230174


The obtained p-value of 0.011 is less than 1.98 (on the right tail). Hence we will reject the null hypothesis.