In [3]:
import numpy as np
import time
from statsmodels.stats.multitest import multipletests

In [4]:
def get_ci_95(data):
    nboot = 100000
    mean_vals = np.zeros(nboot)

    for i in range(nboot):
        idx = np.random.choice(data, size=len(data))
        mean_vals[i] = np.mean(idx)

    mean = np.mean(data)
    lower_bound = np.percentile(mean_vals, 2.5)
    upper_bound = np.percentile(mean_vals, 97.5)

    return [mean, lower_bound, upper_bound]



def permutation_test(data1, data2, random_state=None, nsteps=100000, keep_vals=False):
    """
    2-tailed Permutation test: test if the difference between two groups is significant.
    """
    len1, len2 = len(data1), len(data2)
    k = np.zeros(nsteps)
    np.random.seed(random_state)

    diff = np.abs(np.mean(data1) - np.mean(data2))  # observed difference
    z = np.concatenate([data1, data2])
    for i in range(nsteps):
        np.random.shuffle(z)
        k[i] = np.abs(np.mean(z[:len1]) - np.mean(z[len1:]))

    # get index of sample mean difference that larger than or equal to observed difference
    p_value = len(np.where(k >= diff)[0]) / nsteps

    if keep_vals:
        return {"diff": diff, "k": k, "p_value": p_value}
    else:
        return {"diff": diff, "p_value": p_value}



In [8]:
pvalues = []
xl_pairs = ["S7_Y16", "K10_K196", "Y16_K196", "K76_T86", "K76_T161", "K76_K196", "K76_S208", "T77_S208", "S81_T86", "S81_K96", "K96_K196", "K96_S208", "T161_K204", "K196_S198", "K196_K204", "K196_S208", "K196_K271", "K196_S276", "S198_S208", "T201_S208"]
for pair in xl_pairs:
    start_time = time.time()
    ref_data = np.load(f'MNXL/MNXL_by_pair_states/{pair}_state_5.npy')
    
    for st in range(5):
        dat = np.load(f'MNXL/MNXL_by_pair_states/{pair}_state_{st}.npy')
        print(f'{pair} state {st}')
        result = get_ci_95(dat)
        print(f"{result[0]:.9f}\t[{result[1]:.9f}, {result[2]:.9f}]")
        # pvalue
        ptest_res = permutation_test(dat, ref_data, nsteps=100000)
        print(ptest_res)
        pvalues.append(ptest_res['p_value'])

    # Mean and CI for state 5:
    print(f'{pair} state 5')
    res_st_5 = get_ci_95(ref_data)
    print(f"{res_st_5[0]:.9f}\t[{res_st_5[1]:.9f}, {res_st_5[2]:.9f}]")
        
    
    end_time = time.time()
    print("Execution time:", end_time - start_time, "seconds")
    print("**************************************")

S7_Y16 state 0
0.009446357	[0.007628339, 0.011385053]
{'diff': 0.009445088985282946, 'p_value': 0.0}
S7_Y16 state 1
0.000339578	[0.000091380, 0.000674024]
{'diff': 0.00033831050272264696, 'p_value': 0.0}
S7_Y16 state 2
0.000052612	[0.000000000, 0.000157837]
{'diff': 5.134467801320083e-05, 'p_value': 0.23511}
S7_Y16 state 3
0.001683644	[0.001203788, 0.002209678]
{'diff': 0.0016823766921760853, 'p_value': 0.0}
S7_Y16 state 4
0.000000000	[0.000000000, 0.000000000]
{'diff': 1.2676900748424733e-06, 'p_value': 1.0}
S7_Y16 state 5
0.000001268	[0.000000000, 0.000003803]
Execution time: 79.01981472969055 seconds
**************************************
K10_K196 state 0
0.007987586	[0.006338364, 0.009709180]
{'diff': 0.005519599426677853, 'p_value': 0.0}
K10_K196 state 1
0.000679901	[0.000297779, 0.001128867]
{'diff': 0.001788085803751394, 'p_value': 0.00103}
K10_K196 state 2
0.001730601	[0.001467752, 0.002008331]
{'diff': 0.0007373859359705321, 'p_value': 0.0005}
K10_K196 state 3
0.000859553	[0.0

In [9]:
adjusted_pvalues = multipletests(pvalues, alpha=0.05, method='fdr_bh', is_sorted=False, returnsorted=False)
count = 0
for p, corrected_p in zip (pvalues, adjusted_pvalues[1]):
    count+=1
    print(f"{p:6.5f} {corrected_p:6.5f}")
    if count%5 ==0:
        print()
        
    

0.00000 0.00000
0.00000 0.00000
0.23511 0.50023
0.00000 0.00000
1.00000 1.00000

0.00000 0.00000
0.00103 0.00468
0.00050 0.00238
0.00000 0.00000
0.21800 0.49545

0.09611 0.25292
0.28212 0.58775
0.00000 0.00000
0.00000 0.00000
0.00891 0.03246

0.23018 0.50023
0.38639 0.71100
0.00000 0.00000
0.20622 0.48467
0.79880 1.00000

1.00000 1.00000
1.00000 1.00000
0.01291 0.04194
0.00000 0.00000
1.00000 1.00000

0.22903 0.50023
0.39105 0.71100
0.00909 0.03246
0.00000 0.00000
0.79596 1.00000

1.00000 1.00000
1.00000 1.00000
1.00000 1.00000
0.01214 0.04186
1.00000 1.00000

1.00000 1.00000
1.00000 1.00000
1.00000 1.00000
1.00000 1.00000
1.00000 1.00000

0.72609 1.00000
0.43221 0.77180
0.00000 0.00000
0.12484 0.32010
0.91724 1.00000

1.00000 1.00000
1.00000 1.00000
1.00000 1.00000
1.00000 1.00000
1.00000 1.00000

1.00000 1.00000
1.00000 1.00000
0.82151 1.00000
0.00000 0.00000
1.00000 1.00000

1.00000 1.00000
1.00000 1.00000
0.81978 1.00000
0.00000 0.00000
1.00000 1.00000

0.00436 0.01744
0.01300 0.04