In [1]:
import pandas as pd
import numpy as np

from statsmodels.stats.weightstats import ttest_ind as ttest_ind_sm
from statsmodels.stats.weightstats import DescrStatsW, CompareMeans
from statsmodels.stats.power import TTestIndPower
from scipy.stats import ttest_ind, t

In [2]:
np.random.seed(1869)

In [3]:
df_simple_example = pd.read_csv("./Data/hypothesis_test_example.csv")

In [5]:
df_simple_example.head(10)

Unnamed: 0,x_A,x_B
0,2.035733,-0.171019
1,1.105017,0.65591
2,-0.745565,0.994498
3,1.8823,1.146082
4,-0.369233,0.317476
5,0.806429,1.124682
6,0.320897,-0.413721
7,0.910782,0.326461
8,1.141452,0.30949
9,1.803123,0.538578


In [6]:
df_simple_example.describe()

Unnamed: 0,x_A,x_B
count,100.0,100.0
mean,0.529,0.176
std,0.93847,0.983872
min,-1.036634,-1.95743
25%,-0.241617,-0.44883
50%,0.544784,0.250698
75%,1.108813,0.676324
max,3.126451,2.612088


In [7]:
ttest_ind(df_simple_example["x_A"], df_simple_example["x_B"])

TtestResult(statistic=2.5961983095998966, pvalue=0.010132851609223453, df=198.0)

In [8]:
ttest_ind_sm(df_simple_example['x_A'], df_simple_example['x_B'])

(2.596198309599896, 0.010132851609223469, 198.0)

In [9]:
p_value1 = 2.0*(1.0 - t.cdf(2.5961983095998953, 198))
p_value2 = 2.0*t.cdf(-2.5961983095998953, 198)

print("p-value estimate from right-hand tail of PDF = ", p_value1)
print("p-value estimate from left-hand tail of PDF = ", p_value2)

p-value estimate from right-hand tail of PDF =  0.010132851609223614
p-value estimate from left-hand tail of PDF =  0.01013285160922349


In [13]:
n_permutations = 100000

x_All = np.concatenate((df_simple_example["x_A"].to_numpy(), df_simple_example["x_B"].to_numpy()))

nA = df_simple_example.shape[0]
nB = nA

A_indices = np.arange(0, nA)
B_indices = np.arange(nA, (nA+nB))

m_A = np.mean(x_All[A_indices])
m_B = np.mean(x_All[B_indices])

s2_A = np.var(x_All[A_indices], ddof=1)
s2_B = np.var(x_All[B_indices], ddof=1)

sigma2_observed = (((nA-1)*s2_A) + ((nB-1)*s2_B))/(nA+nB-2)
t_observed = (m_A - m_B) / (np.sqrt(sigma2_observed) * np.sqrt(2.0/nA))

print("Observed t-value is = ", t_observed)

p_count = 0.0

for i in range(n_permutations):

    permutted_indices = np.random.permutation(nA+nB)
    A_indices = permutted_indices[:nA]
    B_indices = permutted_indices[nA:(nA+nB)]

    m_A = np.mean(x_All[A_indices])
    m_B = np.mean(x_All[B_indices])

    s2_A = np.var(x_All[A_indices], ddof=1)
    s2_B = np.var(x_All[B_indices], ddof=1)

    sigma2_permutted = (((nA-1)*s2_A) + ((nB-1)*s2_B))/(nA+nB-2)
    t_permutted = (m_A - m_B) / (np.sqrt(sigma2_permutted) * np.sqrt(2.0/nA))

    if np.abs(t_permutted) >= np.abs(t_observed):
        p_count += 1.0

p_value_permutation = (1.0+p_count)/(1.0+n_permutations)
print("Permutation estimated p-value = ", p_value_permutation)
                          

Observed t-value is =  2.5961983095998966
Permutation estimated p-value =  0.01032989670103299


In [14]:
ttest_ind(df_simple_example['x_A'].to_numpy(), df_simple_example['x_B'].to_numpy(), permutations=n_permutations)


  ttest_ind(df_simple_example['x_A'].to_numpy(), df_simple_example['x_B'].to_numpy(), permutations=n_permutations)


TtestResult(statistic=2.5961983095998966, pvalue=0.00988990110098899, df=nan)

In [4]:
mean_comparison = CompareMeans(DescrStatsW(df_simple_example['x_A']), DescrStatsW(df_simple_example['x_B']))

mean_difference_95CI = mean_comparison.tconfint_diff()

mean_difference_95CI

(0.08486864535681571, 0.6211313546431838)

In [5]:
mean_difference_99CI = mean_comparison.tconfint_diff(alpha=0.01)

mean_difference_99CI

(-0.0006375498458349727, 0.7066375498458345)

In [7]:
import warnings

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    print("Sample size required = ", TTestIndPower().solve_power(effect_size=0.5, nobs1=None, alpha=0.05, power=0.8, ratio=1.0, alternative='two-sided'))

Sample size required =  63.76561058785403


In [9]:

print("Power = ", TTestIndPower().power(effect_size=0.5, nobs1=63.76561058785403, alpha=0.05, ratio=1.0, alternative='two-sided'))

Power =  0.8000000024858956


In [10]:
print("Power = ", TTestIndPower().power(effect_size=0.5, nobs1=85, alpha=0.05, ratio=1.0, alternative='two-sided'))

Power =  0.899894079418773
