# Independent Samples T Test

In [1]:
# Import pandas, numpy, scip.stats
import pandas as pd
import numpy as np
from scipy import stats

In [2]:
men=[96.9, 97.4, 97.5, 97.8, 97.8, 97.9, 98, 98.6, 98.8]
women = [97.8, 98, 98.2, 98.2, 98.2, 98.6, 98.8, 99.2, 99.4]

In [None]:
#H0: mu1 = mu2
#H1: mu1 != mu2

In [3]:
#Perform Levene test for equal variances
#H0: The population variances are equal
#H1: There is a difference between the variances in the population
#The small p-value suggests that the populations do not have equal variances.(equal_var = False)
#The large p-value suggests that the populations have equal variances.(equal_var = True)
leveneTest = stats.levene(men, women)
leveneTest

LeveneResult(statistic=0.0031923383878696503, pvalue=0.9556424422138867)

In [4]:
# average men
np.mean(men)

97.85555555555555

In [5]:
# average women
np.mean(women)

98.48888888888888

Calculate the T-test for the means of two independent samples of scores.

In [None]:
#H0: mu1 = mu2
#H1: mu1 != mu2

In [None]:
#help(stats.ttest_ind)

In [6]:
# Calculate test statistics using stats.ttest_ind()
indTest = stats.ttest_ind(men, women, equal_var=True)
indTest

Ttest_indResult(statistic=-2.3724271468993643, pvalue=0.03054788637798765)

In [8]:
indTest.statistic     # t score

-2.3724271468993643

In [9]:
indTest.pvalue    # p value

0.03054788637798765

In [10]:
# Decision
alpha = 0.05

if indTest.pvalue < alpha:
    print("Reject the null. At the α=0.05, we have sufficient evidence to indicate that mean body temperatures differ for men and women. ")
else:
    print("Fail to reject the null.")

Reject the null. At the α=0.05, we have sufficient evidence to indicate that mean body temperatures differ for men and women. 


# Paired (Dependent) Samples T Test

## Example 2

In [11]:
karlsruhe=[1.186, 1.151, 1.322, 1.339, 1.2, 1.402, 1.365, 1.537, 1.559]
lehigh=[1.061, 0.992, 1.063, 1.062, 1.065, 1.178, 1.037, 1.086, 1.052]

In [None]:
#H0: mu1 = mu2
#H1: mu1 != mu2

In [12]:
# Calculate test statistics using stats.ttest_rel()
pairedtest = stats.ttest_rel(karlsruhe, lehigh, alternative= "two-sided")
pairedtest

TtestResult(statistic=6.0819394375848255, pvalue=0.00029529546278604066, df=8)

In [13]:
# Decision
alpha = 0.05

if pairedtest.pvalue < alpha:
    print("Reject the Null. The data indicates that the Karlsruhe method produces, on the average, higher strength predictions than does the Lehigh method")
else:
    print("Fail to reject")

Reject the Null. The data indicates that the Karlsruhe method produces, on the average, higher strength predictions than does the Lehigh method


# ANOVA

## Example 3

In [None]:
#!pip install statsmodels

In [14]:
import statsmodels.api as sm

In [15]:
df = sm.datasets.get_rdataset(dataname = "Cushings", package = "MASS")
df.keys()

dict_keys(['data', '__doc__', 'package', 'title', 'from_cache'])

In [16]:
df=df.data
df.head(10)

Unnamed: 0_level_0,Tetrahydrocortisone,Pregnanetriol,Type
rownames,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
a1,3.1,11.7,a
a2,3.0,1.3,a
a3,1.9,0.1,a
a4,3.8,0.04,a
a5,4.1,1.1,a
a6,1.9,0.4,a
b1,8.3,1.0,b
b2,3.8,0.2,b
b3,3.9,0.6,b
b4,7.8,1.2,b


In [None]:
# H0: μ1 = μ2 = μ3 = μ4
# Ha: at least one of them is different

In [17]:
df.shape

(27, 3)

In [23]:
df.Type.unique()

array(['a', 'b', 'c', 'u'], dtype=object)

In [19]:
from scipy.stats import f_oneway

In [20]:
df.Tetrahydrocortisone[df.Type=="a"]

rownames
a1    3.1
a2    3.0
a3    1.9
a4    3.8
a5    4.1
a6    1.9
Name: Tetrahydrocortisone, dtype: float64

In [24]:
anova_test = stats.f_oneway(df.Tetrahydrocortisone[df.Type=="a"], df.Tetrahydrocortisone[df.Type=="b"], df.Tetrahydrocortisone[df.Type=="c"], df.Tetrahydrocortisone[df.Type=="u"])
anova_test

F_onewayResult(statistic=3.2257394791378426, pvalue=0.0412182793672776)

In [25]:
# Decision
alpha = 0.05

if anova_test.pvalue < alpha:
    print("We can reject H0 at 0.05 significance level and conclude that the differences among group means are statistically significant")
else:
    print("Fail to reject")

We can reject H0 at 0.05 significance level and conclude that the differences among group means are statistically significant


|END OF THE PROJECT