In [13]:
import numpy as np
import numpy.random as npr
import pandas as pd
from datetime import date
import os

In [14]:
def bootstrap(invec):
    idx = npr.randint(0, len(invec), len(invec))
    return [invec[i] for i in idx]

#Estimate pi0 for q-value calculation
def estimatePi0(p, numBoot=100, numLambda=100, maxLambda=0.95):
    p.sort()
    n=len(p)
    lambdas=np.linspace(maxLambda/numLambda,maxLambda,numLambda)
    Wls=np.array([n-np.argmax(p>=l) for l in lambdas])
    pi0s=np.array([Wls[i] / (n * (1 - lambdas[i])) for i in range(numLambda)])
    minPi0=np.min(pi0s)
    mse = np.zeros(numLambda)
    for boot in range(numBoot):
        pBoot = bootstrap(p)
        pBoot.sort()
        WlsBoot =np.array([n-np.argmax(pBoot>=l) for l in lambdas])
        pi0sBoot =np.array([WlsBoot[i] / (n *(1 - lambdas[i])) for i in range(numLambda)])
        mse = mse + np.square(pi0sBoot-minPi0)
    minIx = np.argmin(mse)
    return pi0s[minIx]

# The input to this function is tupples of p-values and analyte names, e.g. (p,coord)
#def qvalues(pvalues):
#   m=len(pvalues)
#    pvalues.sort()
#    pi0 = estimatePi0([p for p,coord in pvalues])
#    num_p, qs = 0.0, []
#    for p,coord in pvalues:
#        num_p += 1.0
#        q = pi0*p*m/num_p
#        qs.append((q,p,coord))
#    qs.reverse()
#    old_q=1.0
#    for ix in range(len(qs)):
#        q = min(old_q,qs[ix][0])
#        old_q = q
#        qs[ix] = (q,qs[ix][1],qs[ix][2])
#    qs.reverse()
#    return qs

#Calculate q-values
def qvalues(pvalues):

    pcolname = pvalues.columns[0]

    m = pvalues.shape[0]
    assert(m>0)
    pvalues.sort_values(by = pcolname, inplace=True)
    pi0 = estimatePi0(pvalues.transpose().values[0].tolist())
    num_p = 0

    qs = pd.DataFrame(columns = [ 'q' ])

    for TF in pvalues.index:
        p = pvalues.loc[TF,pcolname]
        num_p += 1
        fdr = pi0*p*m/num_p
        qs.loc[TF,'q'] = fdr 


    qs = qs.iloc[::-1]
    old_q=1.0
    for TF in qs.index:
        q = min(old_q,qs.loc[TF,'q'])
        old_q = q
        qs.loc[TF,'q'] = q
    return qs

#Save files from performed experiments
def save_exp(data, filename):
    if os.path.isdir('../exp/'+str(date.today())):
        data.to_csv('../exp/'+str(date.today())+'/'+filename+'.tsv', sep='\t')
    else:
        os.mkdir('../exp/'+str(date.today()))
        data.to_csv('../exp/'+str(date.today())+'/'+filename+'.tsv', sep='\t')

In [22]:
p_vals = pd.read_csv('../exp/'+str(date.today())+'/p_vals.tsv', sep='\t', index_col='TF')

q_vals = pd.DataFrame(index=p_vals.index, columns=p_vals.columns)

for C in p_vals.columns:
    subset = pd.DataFrame(p_vals.loc[:,C])
    q = qvalues(subset)
    q_vals.loc[:,C] = q

In [19]:
q_vals.sort_values(by='C(organ):C(dev_stage)', inplace=True)

In [None]:
save_exp(qvals,'q_vals')