In [69]:
import numpy as np
import scipy.stats as ss

# Test for equality of means (small sample): t-test

## Example 1: Equal Variance case

In [71]:
from scipy.stats import ttest_ind
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [2.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]


In [95]:
np.std(data1,ddof=1)

1.4568542556557338

In [96]:
np.std(data2,ddof=1)

0.832984520331027

In [74]:
k=np.std(data1,ddof=1)/np.std(data2,ddof=1) 
print(k)
if k<=2 and k>=0.5:
    print("Use equal variance case")
else:
    print("Use unequal variance case")

1.4009243817004382
Use equal variance case


In [97]:
stat, p = ss.ttest_ind(data1, data2,equal_var=True)
p

0.7484698873615687

### Note:
#Above procedure is for two tailed test only. #If it is one tailed test (ower/upper) use (p-value/2) and compare with alpha.

## Example 2: Unequal Variance case

In [98]:
from scipy.stats import ttest_ind
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -3.478, -1.637, -2.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]


In [99]:
np.std(data1,ddof=1)

1.852998138153409

In [100]:
np.std(data2,ddof=1)

0.832984520331027

In [101]:
k=np.std(data1,ddof=1)/np.std(data2,ddof=1) 
print(k)
if k<=2 and k>=0.5:
    print("Use equal variance case")
else:
    print("Use unequal variance case")


2.2245288993089933
Use unequal variance case


In [102]:
stat, p = ss.ttest_ind(data1, data2,equal_var=False)
p

0.4753636615288518

# Example 3: paired t-Test

In [103]:
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]


In [105]:
stat, p = ss.ttest_rel(data1, data2)
p

0.7459078283577478

### Note:
#Above procedure is for two tailed test only. #If it is one tailed test (ower/upper) use (p-value/2) and compare with alpha.

# CSV File

In [107]:
import pandas as pd
data = pd.read_csv('brain_size.csv', sep=';', na_values=".")
#data  

# One Sample t-test

#### H0: mu=110
#### H1: mu!=110

In [108]:
ss.ttest_1samp(data['VIQ'], 150)

Ttest_1sampResult(statistic=-10.082928027614397, pvalue=2.0242281047081968e-12)

# Two Sample t-test (.csv file)

#### H0: mu1=mu2
#### H1: mu1=mu2

In [86]:
female_viq = data[data['Gender'] == 'Female']['VIQ']
male_viq = data[data['Gender'] == 'Male']['VIQ']

In [45]:
np.std(female_viq,ddof=1),np.std(male_viq,ddof=1)

(21.670924008182507, 25.640993083650045)

In [109]:
#data

In [110]:
#data.head(11)

In [111]:
#data.describe(include='all')

In [112]:
k=np.std(female_viq,ddof=1)/np.std(male_viq,ddof=1)
print(k)
if k<=2 and k>=0.5:
    print("Use equal variance case")
else:
    print("Use unequal variance case")

0.8451671094596157
Use equal variance case


In [115]:
t,p=ss.ttest_ind(female_viq, male_viq,equal_var=True) 
p

0.44452876778583217

# Equality of Means: t-Test (.csv file)

In [48]:
t,p=ss.ttest_ind(data['FSIQ'], data['PIQ'])
p

0.6427725009414841

# Equality of Means: z-Test (.csv file)

In [64]:
import statsmodels.stats.weightstats as stests

In [65]:
ztest ,pval = stests.ztest(data['FSIQ'], data['PIQ'],value=0,alternative='two-sided')
pval

0.6414749271063565

# paired t-Test (.csv file)

In [116]:
t,p=ss.ttest_rel(data['FSIQ'], data['PIQ'])
p

0.08217263818364236

##### Also, instead of pair t-test we can use single sample test as follows

In [22]:
t,p=ss.ttest_1samp(data['FSIQ'] - data['PIQ'], 0)
p

0.08217263818364236

# Chi-square test of independance

In [118]:
from scipy.stats import chi2_contingency
from scipy.stats import chi2
# contingency table
table = [	[10, 20, 30],
			[6,  9,  17]]
print(table)
stat, p, dof, expected = chi2_contingency(table)
print('dof=%d' % dof)
print(expected)
# interpret test-statistic
prob = 0.95
critical = chi2.ppf(prob, dof)
print('probability=%.3f, critical=%.3f, stat=%.3f' % (prob, critical, stat))
if abs(stat) >= critical:
	print('Dependent (reject H0)')
else:
	print('Independent (fail to reject H0)')
# interpret p-value
alpha = 1.0 - prob
print('significance=%.3f, p=%.3f' % (alpha, p))
if p <= alpha:
	print('Dependent (reject H0)')
else:
	print('Independent (fail to reject H0)')

[[10, 20, 30], [6, 9, 17]]
dof=2
[[10.43478261 18.91304348 30.65217391]
 [ 5.56521739 10.08695652 16.34782609]]
probability=0.950, critical=5.991, stat=0.272
Independent (fail to reject H0)
significance=0.050, p=0.873
Independent (fail to reject H0)


# Chi-square Goodness of fit Test

In [119]:
import scipy.stats as ss

observed_values=np.array([18,21,16,7,15])
expected_values=np.array([22,19,44,8,16])

ss.chisquare(observed_values, f_exp=expected_values)

Power_divergenceResult(statistic=18.94348086124402, pvalue=0.0008062955548480186)

In [67]:
chi=ss.chisquare(data['FSIQ'],ddof=1)

In [68]:
chi

Power_divergenceResult(statistic=199.36447774349932, pvalue=9.176691820721271e-24)