In [2]:
import numpy as np
import numpy.random as npr
import pandas as pd
from datetime import date
import os

In [3]:
def bootstrap(invec):
    idx = npr.randint(0, len(invec), len(invec))
    return [invec[i] for i in idx]

def estimatePi0(p, numBoot=100, numLambda=100, maxLambda=0.95):
    p.sort()
    n=len(p)
    lambdas=np.linspace(maxLambda/numLambda,maxLambda,numLambda)
    Wls=np.array([n-np.argmax(p>=l) for l in lambdas])
    pi0s=np.array([Wls[i] / (n * (1 - lambdas[i])) for i in range(numLambda)])
    minPi0=np.min(pi0s)
    mse = np.zeros(numLambda)
    for boot in range(numBoot):
        pBoot = bootstrap(p)
        pBoot.sort()
        WlsBoot =np.array([n-np.argmax(pBoot>=l) for l in lambdas])
        pi0sBoot =np.array([WlsBoot[i] / (n *(1 - lambdas[i])) for i in range(numLambda)])
        mse = mse + np.square(pi0sBoot-minPi0)
    minIx = np.argmin(mse)
    return pi0s[minIx]

# The input to this function is tupples of p-values and analyte names, e.g. (p,coord)
#def qvalues(pvalues):
#   m=len(pvalues)
#    pvalues.sort()
#    pi0 = estimatePi0([p for p,coord in pvalues])
#    num_p, qs = 0.0, []
#    for p,coord in pvalues:
#        num_p += 1.0
#        q = pi0*p*m/num_p
#        qs.append((q,p,coord))
#    qs.reverse()
#    old_q=1.0
#    for ix in range(len(qs)):
#        q = min(old_q,qs[ix][0])
#        old_q = q
#        qs[ix] = (q,qs[ix][1],qs[ix][2])
#    qs.reverse()
#    return qs

def qvalues(pvalues):

    pcolname = pvalues.columns[0]

    m = float(len(pvalues.transpose().values[0].tolist()))
    assert(m>0)
    pvalues = pvalues.sort_values(by = pcolname)
    pi0 = estimatePi0(pvalues.transpose().values[0].tolist())
    num_p, p_sum = 0, 0.0

    qs = pd.DataFrame(columns = [ 'q' ])

    for index, row in pvalues.iterrows():
        p = row[pcolname]
        num_p += 1
        p_sum += p
        q = pi0*p*m/float(num_p)
        qs.loc[index,'q'] = q 


    qs = qs.iloc[::-1]
    old_q=1.0
    for ix in range(len(qs)):
        q = min(old_q,qs.iloc[ix, 0])
        old_q = q
        qs.iloc[ix, 0] = q
    return qs

In [6]:
p_vals = pd.read_csv('../exp/'+str(date.today())+'/p_vals.csv', index_col='TF')
display(p_val)

q_vals = pd.DataFrame(index=p_vals.index, columns=p_vals.columns)

for C in p_vals.columns
    subset = pd.DataFrame(p_vals.loc[:,C])
    q = qvalues(subset)
    q_vals.loc[:,C] = q

Unnamed: 0.1,Unnamed: 0,C(organ),C(dev_stage),C(organ):C(dev_stage)
0,Acaa2,0.153573,0.843688,0.234084


In [None]:
low_q_vals = q_vals.where(q_vals.iloc[:,'C(organ):C(dev_stage)']<0.005).dropna()
