# Independent t-test with confidence intervals in Python 

In [1]:
import pandas as pd
from scipy import stats
from math import sqrt
from scipy.stats import t

In [2]:
data = pd.read_csv("datasets/PorkBeef.csv")

In [3]:
data.head()

Unnamed: 0,names,value
0,pork,24
1,pork,124
2,pork,304
3,pork,446
4,pork,13


In [5]:
# Run independent t-test.
ind_t_test = stats.ttest_ind(data[data["names"] == "beef"]["value"],
                             data[data["names"] == "pork"]["value"])

In [7]:
# Calculate the mean difference and 95% confidence interval
N1 = 30
N2 = 30
df = (N1 + N2 - 2)
std1 = data[data["names"] == 'beef']["value"].std()
std2 = data[data["names"] == 'pork']["value"].std()
std_N1N2 = sqrt( ((N1 - 1)*(std1)**2 + (N2 - 1)*(std2)**2) / df) 

diff_mean = data.value[data.names == 'beef'].mean() - data.value[data.names == 'pork'].mean()
MoE = t.ppf(0.975, df) * std_N1N2 * sqrt(1/N1 + 1/N2)

print('The results of the independent t-test are: \n\tt-value = {:4.3f}\n\tp-value = {:4.3f}'.format(ind_t_test[0],ind_t_test[1]))
print ('\nThe difference between groups is {:3.1f} [{:3.1f} to {:3.1f}] (mean [95% CI])'.format(diff_mean, diff_mean - MoE, diff_mean + MoE))

The results of the independent t-test are: 
	t-value = 2.377
	p-value = 0.021

The difference between groups is 91.6 [14.5 to 168.7] (mean [95% CI])
