In [1]:
# Learning the t-distribution
# t = observed difference between sample mean / standard error of the difference between the means
# t = (mean(X1) - mean(X2)) / sed
# where sed = sqrt(se1^2 + se2^2) and se = std / sqrt(n)
# Assumtions: Samples drawn from Gaussian Distribution
#             Size of each sample is approximately equal
#             Samples have same variance

In [2]:
import numpy as np
import scipy as sp
from scipy import stats

In [3]:
def independent_t_test(data1, data2, alpha):
    mean1, mean2 = np.mean(data1), np.mean(data2)
    se1, se2 = sp.stats.sem(data1), sp.stats.sem(data2)
    sed = np.sqrt(se1**2.0+se2**2.0)
    t_stat = (mean1-mean2)/sed
    df = len(data1)+len(data2)-2
    cv = stats.t.ppf(1.0-alpha, df)
    p = (1.0-stats.t.cdf(abs(t_stat),df))*2.0
    return t_stat, df, cv, p

In [4]:
# Generate some data
data1 = 5*np.random.randn(100)+50
data2 = 5*np.random.randn(100)+51

In [6]:
stat, p = stats.ttest_ind(data1, data2)
print('t = {:.3f}, p = {:.3f}'.format(stat, p))

t = -2.585, p = 0.010


In [5]:
alpha = 0.05
t_stat, df, cv, p = independent_t_test(data1,data2,0.05)
print('t_stat = {:.3f}, df = {}, cv = {:.3f}, p = {:.3f}'.format(t_stat, df, cv, p))

t_stat = -2.585, df = 198, cv = 1.653, p = 0.010


In [7]:
if abs(t_stat)<=cv:
    print('Accept null hypothesis that the means are equal')
else:
    print('Reject null hypotehsis that the means are equal')

if p>alpha:
    print('Accept null hypothesis that the means are equal')
else:
    print('Reject null hypothesis that the means are equal')

Reject null hypotehsis that the means are equal
Reject null hypothesis that the means are equal
