In [2]:
import numpy as np
import numpy.random as npr
import pandas as pd
from datetime import date
import os

In [25]:
def bootstrap(invec):
    idx = npr.randint(0, len(invec), len(invec))
    return [invec[i] for i in idx]

def estimatePi0(p, numBoot=100, numLambda=100, maxLambda=0.95):
    p.sort()
    n=len(p)
    lambdas=np.linspace(maxLambda/numLambda,maxLambda,numLambda)
    Wls=np.array([n-np.argmax(p>=l) for l in lambdas])
    pi0s=np.array([Wls[i] / (n * (1 - lambdas[i])) for i in range(numLambda)])
    minPi0=np.min(pi0s)
    mse = np.zeros(numLambda)
    for boot in range(numBoot):
        pBoot = bootstrap(p)
        pBoot.sort()
        WlsBoot =np.array([n-np.argmax(pBoot>=l) for l in lambdas])
        pi0sBoot =np.array([WlsBoot[i] / (n *(1 - lambdas[i])) for i in range(numLambda)])
        mse = mse + np.square(pi0sBoot-minPi0)
    minIx = np.argmin(mse)
    return pi0s[minIx]

# The input to this function is tupples of p-values and analyte names, e.g. (p,coord)
#def qvalues(pvalues):
#   m=len(pvalues)
#    pvalues.sort()
#    pi0 = estimatePi0([p for p,coord in pvalues])
#    num_p, qs = 0.0, []
#    for p,coord in pvalues:
#        num_p += 1.0
#        q = pi0*p*m/num_p
#        qs.append((q,p,coord))
#    qs.reverse()
#    old_q=1.0
#    for ix in range(len(qs)):
#        q = min(old_q,qs[ix][0])
#        old_q = q
#        qs[ix] = (q,qs[ix][1],qs[ix][2])
#    qs.reverse()
#    return qs

def qvalues(pvalues):

    pcolname = pvalues.columns[0]

    m = pvalues.shape[0]
    assert(m>0)
    pvalues.sort_values(by = pcolname, inplace=True)
    pi0 = estimatePi0(pvalues.transpose().values[0].tolist())
    num_p = 0

    qs = pd.DataFrame(columns = [ 'q' ])

    for TF in pvalues.index:
        p = pvalues.loc[index,pcolname]
        num_p += 1
        fdr = pi0*p*m/num_p
        qs.loc[TF,'q'] = fdr 


    qs = qs.iloc[::-1]
    old_q=1.0
    for TF in qs.index:
        q = min(old_q,qs.iloc[TF,'q'])
        old_q = q
        qs.iloc[TF,'q'] = q
    return qs

In [26]:
p_vals = pd.read_csv('../exp/'+str(date.today())+'/p_vals.csv', index_col='TF')
display(p_vals)

q_vals = pd.DataFrame(index=p_vals.index, columns=p_vals.columns)

for C in p_vals.columns:
    subset = pd.DataFrame(p_vals.loc[:,C])
    q = qvalues(subset)
    q_vals.loc[:,C] = q
display(q_vals)

Unnamed: 0_level_0,C(organ),C(dev_stage),C(organ):C(dev_stage)
TF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Acaa2,0.153573,0.843688,0.234084
Acss2,0.008522,0.677961,0.181684
Actb,0.002562,0.325240,0.213794
Adnp,0.002137,0.686789,0.238376
Aebp2,0.000649,0.641234,0.221934
...,...,...,...
Zic3,0.000199,0.592143,0.225363
Zkscan1,0.003943,0.687029,0.242536
Zmiz1,0.013469,0.747832,0.229980
Zmynd8,0.023532,0.764870,0.245106


Unnamed: 0_level_0,C(organ),C(dev_stage),C(organ):C(dev_stage)
TF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Acaa2,0.00156571,0.0505604,0.00529989
Acss2,0.000112462,0.0496242,0.00529989
Actb,4.48768e-05,0.0496242,0.00529989
Adnp,3.88629e-05,0.0496242,0.00529989
Aebp2,1.76487e-05,0.0496242,0.00529989
...,...,...,...
Zic3,9.4953e-06,0.0496242,0.00529989
Zkscan1,6.15828e-05,0.0496242,0.00529989
Zmiz1,0.000164308,0.0496242,0.00529989
Zmynd8,0.000265133,0.0496242,0.00529989


In [27]:
#low_q_vals = q_vals.where(q_vals.loc[:,'C(organ):C(dev_stage)']<0.0046).dropna()
#low_q_vals

In [28]:
try:
    q_vals.to_csv('../exp/'+str(date.today())+'/q_vals.csv')
except FileNotFoundError:
    os.mkdir('../exp/'+str(date.today()))
    q_val.to_csv('../exp/'+str(date.today())+'/q_vals.csv')