In [1]:
import math
import scipy.stats as st

### Test Hypothesis Based on Two Sample

In [23]:
def two_sample_hypothesis(delta,mean1, mean2, m , n, sd1, sd2):
    z_value = (mean1-mean2 - delta)/math.sqrt(sd1**2/m + sd2**2/n)
    p_value = st.norm.cdf(z_value)
    return z_value,p_value


In [59]:
z_value, p_value = two_sample_hypothesis(delta = 0, mean1 = 18.17 , mean2 = 16.82 , m = 42, n=30,sd1= 1.6, sd2 = 1.3)
print('z_value: ', z_value, 'p_value: ', p_value)

z_value:  3.9419508948604243 p_value:  0.9999595892370591


### beta and Choice of Sample Size

In [60]:
def two_sample_larger_beta(delta, new_delta, m , n, sd1, sd2,alpha):
    return st.norm.cdf(st.norm.ppf(1-alpha) - (new_delta - delta)/(math.sqrt(sd1**2/m+sd2**2/n)))
def two_sample_smaller_beta(delta, new_delta, m , n, sd1, sd2,alpha):
    return 1- st.norm.cdf(-st.norm.ppf(1-alpha) - (new_delta - delta)/(math.sqrt(sd1**2/m+sd2**2/n)))
def two_sample_larger_beta(delta, new_delta, m , n, sd1, sd2,alpha):
    return st.norm.cdf(st.norm.ppf(1-alpha/2) - (new_delta - delta)/(math.sqrt(sd1**2/m+sd2**2/n))) - (st.norm.cdf(-st.norm.ppf(1-alpha/2) - (new_delta - delta)/(math.sqrt(sd1**2/m+sd2**2/n))))

In [61]:
two_sample_larger_beta(delta=0, new_delta= 1, m = 42, n=30,sd1= 1.6, sd2 = 1.3,alpha= 0.01)

0.3653726165797801

In [57]:
two_sample_smaller_beta(delta = -1, new_delta = -1.3,  m = 8, n=8,sd1= 0.1, sd2 = 0.5, alpha = 0.01)

0.746093622766712

In [62]:
alpha = 0.01
sd1 = 1.6
sd2 = 1.3
m = 42
n = 30
delta = 0
new_delta = 1
st.norm.cdf(st.norm.ppf(1-alpha) - ((new_delta - delta)/math.sqrt(sd1**2/m + sd2**2/n)))

0.27638457157488855

In [20]:
# Sample size:
def sample_N_for_two_sample(sd1, sd2, alpha, beta, delta, new_delta):
    return ((sd1**2+ sd2**2)*(st.norm.ppf(1-alpha)+st.norm.ppf(1-beta))**2) / (new_delta - delta)**2

In [58]:
sample_N_for_two_sample(sd1 = 0.1, sd2 = 0.5, alpha = 0.01, beta = 0.1, delta = 0, new_delta=1 )

37.604488613353816

### Confidence Interval for two sample

In [36]:
def CI_for_two_sample(mean1, mean2, sd1, sd2, m, n, confidence):
    A = (mean1 - mean2)
    B = (abs(st.norm.ppf((1-confidence)/2)))*(math.sqrt(sd1**2 / m + sd2**2/n))
    return  (A-B, A+B)

In [63]:
CI_for_two_sample(mean1 =  115.2, mean2 =129.5, sd1 =5.01, sd2 = 5.32, m = 6,n = 6, confidence=.95 )

(-20.14727151003095, -8.452728489969044)

### The two-sample t test and CI

In [2]:
def T_value_for_two_sample(mean1, mean2, m, n, sd1, sd2, delta):
    T = (mean1-mean2-delta)/(math.sqrt(sd1**2/m + sd2**2/n))
    DOF = math.ceil((sd1**2/m + sd2**2/n)**2/(( (sd1**2/m)**2/ (m-1) )+ ( (sd2**2/n)**2/ (m-1) )))
    return T, DOF

In [6]:
T, DOF = T_value_for_two_sample(mean1=81, mean2=2.05 , m=10, n=10, sd1 = 0.2, sd2= 0.4, delta =-1)
print("T-value is: ", T)
print("DOF is ", DOF)
p_value = st.t.sf(T,df=DOF)
print("P_value is: ", 1-p_value)

T-value is:  -2.8991378028648422
DOF is  14
P_value is:  0.005830951925841377


In [54]:
T, DOF = T_value_for_two_sample(mean1=5.6 , mean2=3.9  , m=32, n=32, sd1 = 3.2, sd2= 2.6, delta =1)
print("T-value is: ", T)
print("DOF is ", DOF)
p_value = st.t.cdf(T,df=DOF)
print("P_value is: ", 1-p_value)


T-value is:  0.960392076798049
DOF is  60
P_value is:  0.17035615655952885


In [68]:
# CI: Need to calculate df first:
_, DOF = T_value_for_two_sample(mean1= 115.2, mean2=129.5 , m=6, n=6, sd1 =5.01, sd2=5.32, delta =0)
print("Degree of freedom is: ", DOF)
def CI_for_t_value(mean1, mean2, t_value,  m, n,sd1, sd2):
    A = (mean1 - mean2)
    B = t_value*math.sqrt(sd1**2/m + sd2**2/n)
    return (A-B, A+B)
CI_for_t_value(mean1=115.2, mean2=129.5, t_value = 2.228, m=6, n=6, sd1 =5.01, sd2=5.32)


Degree of freedom is:  10


(-20.946918528661733, -7.653081471338263)

In [27]:
import numpy as np
A = np.asarray([1929,2546,2825, 1921, 1628,2175,2113,2621,1843,2543])
B = np.asarray([2129,2885,2895,1945,1750,2181,2164,2626, 2006,2625])
D = B-A
D

array([200, 339,  70,  24, 122,   6,  51,   5, 163,  82])

In [30]:
# paired T test
def paired_t_test(d, delta_0,sd, n):
    t = (d - delta_0)/(sd / math.sqrt(n))
    return t, st.t.cdf(t, df = n-1)



In [31]:
t, p = paired_t_test(np.mean(D), 25,np.std(D), len(D) )
1-p

0.014657545839519504