In [1]:
import numpy as np
import pandas as pd
from pathlib import Path

In [64]:
def copyMat(df: pd.DataFrame, zero:bool = False) -> pd.DataFrame:
    if zero:
        dfnew = pd.DataFrame(np.zeros(shape=df.shape), index=df.index, columns=df.columns)
    else:
        dfnew = df.copy(deep=True)
    return dfnew

In [231]:
from typing import Dict, List, Tuple
import numpy as np
from scipy.stats import mannwhitneyu, norm


def AUC(labels: pd.Series, values: pd.Series) -> Dict[str, float]:
    posii = labels[labels > 0]
    negii = labels[labels <= 0]
    posn = len(posii)
    negn = len(negii)
    posval = values[posii.index]
    negval = values[negii.index]
    if (posn > 0 and negn > 0):
        statistic, pvalue = mannwhitneyu(posval, negval, alternative = "greater")
        conf_int_low, conf_int_high = mannwhitneyu_conf_int(posval, negval)
        res = {
            "low": conf_int_low,
            "high": conf_int_high,
            "auc": (statistic / (posn * negn)),
            "pval": pvalue
        }
    else:
        res = {
        "auc": 0.5,
        "pval": np.nan
        }

    return res


def mannwhitneyu_conf_int(
    x: np.array,
    y: np.array,
    alpha: float = 0.05
) -> Tuple[float, float]:
    lx = len(x)
    ly = len(y)

    N = norm.ppf(1 - alpha/2)

    diffs = sorted([i-j for i in x for j in y])

    # For an approximate 100(1-a)% confidence interval first calculate K:
    k = int(round(lx*ly/2 - (N * (lx*ly*(lx+ly+1)/12)**0.5)))

    # The Kth smallest to the Kth largest of the n x m differences 
    # lx and ly should be > ~20
    CI = (diffs[k], diffs[len(diffs)-k])
    return CI

In [7]:
dataPath = Path("/home/milo/workspace/pyplier/tests/data/crossVal")
data = pd.read_csv(filepath_or_buffer = dataPath / "data.csv", index_col=0)

In [8]:
data

Unnamed: 0,BD8001,BD8002,BD8003,BD8004,BD8005,BD8006,BD8007,BD8008,BD8009,BD8010,...,BD8031,BD8032,BD8033,BD8034,BD8038,BD8041,BD8042,BD8043,BD8044,BD8045
GAS6,-1.505242,0.188428,1.382268,-0.886040,0.218978,-1.659888,2.796534,0.829210,-1.649642,-0.699206,...,0.167190,-0.443537,-0.413695,-0.033962,-0.003753,-0.562015,-1.457108,0.254652,-0.945488,1.285548
MMP14,-1.362254,1.426659,-0.112956,-0.547275,-0.657120,0.623815,0.162686,0.399899,-0.959655,2.260257,...,-0.136098,-0.776283,0.401661,-0.029038,-1.121673,-0.977343,-1.771282,0.948462,0.723387,0.895571
MARCKSL1,-1.559936,1.247540,-2.110556,-0.721440,-0.058441,0.154611,1.487958,-0.045567,1.409842,0.235813,...,-2.010731,1.165882,-0.192181,-0.207469,-1.077898,0.035008,-0.486909,-0.433482,0.683989,2.203569
SPARC,0.465116,-0.849214,0.790658,0.906851,0.163648,0.117905,-1.060804,-1.701798,0.920468,-1.141010,...,-2.644481,-0.004298,0.902712,-0.857343,-0.450150,-0.669798,0.684989,-0.941961,-0.446238,0.891104
CTSD,0.024942,0.206589,-1.452483,-1.102910,0.034775,-0.016322,0.844618,0.483747,-0.650142,1.753523,...,-0.776214,0.643598,-0.739288,-0.768372,-1.777247,0.343815,-0.374175,0.949864,0.449915,2.400907
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TRPM4,-0.324201,-0.774261,-0.128307,-0.540268,1.317625,0.363039,-0.019754,1.232543,-1.814046,1.174763,...,0.255074,0.288240,-1.268223,0.669198,0.361300,-0.930080,0.117951,-0.105594,-0.502940,1.052108
LAIR2,-0.394921,0.851458,-2.219011,1.623721,-2.460171,-0.319912,0.197574,0.627586,-0.413208,0.143300,...,-1.237987,-0.185263,0.607988,0.617346,-0.586569,-1.212232,0.003727,0.496207,-0.900624,0.757522
ZNF135,0.545218,-0.707431,0.174777,-0.063097,-0.423388,0.432197,-0.180568,0.141946,-0.231568,0.471386,...,-0.121949,-0.133662,-0.745435,0.416144,-0.409687,-1.083912,-0.355580,0.583608,1.140988,-0.355580
MARCH3,1.117124,-0.882370,0.078125,0.027025,-0.312435,-0.634321,-0.208948,-0.704559,1.774945,1.256757,...,-0.669562,0.712260,0.291270,0.131922,0.131922,-1.069672,0.535645,-0.186776,-2.450563,0.417511


In [69]:
plierRes_b = pd.read_csv(filepath_or_buffer = dataPath / "plierRes_b.csv", index_col=0)
plierRes_c = pd.read_csv(filepath_or_buffer = dataPath / "plierRes_c.csv", index_col=0)
plierRes_residual = pd.read_csv(filepath_or_buffer = dataPath / "plierRes_residual.csv", index_col=0)
plierRes_u = pd.read_csv(filepath_or_buffer = dataPath / "plierRes_u.csv", index_col=0)
plierRes_z = pd.read_csv(filepath_or_buffer = dataPath / "plierRes_z.csv", index_col=0)

In [10]:
heldoutgenes = pd.read_csv(filepath_or_buffer = dataPath / "heldOutGenes.csv")

In [11]:
heldoutgenes

Unnamed: 0,name,value
0,IRIS_Bcell-Memory_IgG_IgA,SPDYE1
1,IRIS_Bcell-Memory_IgG_IgA,HEY1
2,IRIS_Bcell-Memory_IgG_IgA,FCRL1
3,IRIS_Bcell-Memory_IgG_IgA,AFF3
4,IRIS_Bcell-Memory_IgG_IgA,COCH
...,...,...
8004,PID_PI3KPLCTRKPATHWAY,PIK3R1
8005,PID_PI3KPLCTRKPATHWAY,GRB2
8006,PID_PI3KPLCTRKPATHWAY,SOS1
8007,PID_PI3KPLCTRKPATHWAY,FOXO3


In [39]:
heldOutGenes = {k: g["value"].tolist() for k,g in heldoutgenes.groupby("name")}

In [70]:
plierRes = {
    "B": plierRes_b,
    "C": plierRes_c,
    "U": plierRes_u,
    "Z": plierRes_z,
    "residual": plierRes_residual,
    "heldOutGenes": heldOutGenes
}

In [71]:
priorMat = pd.read_csv(filepath_or_buffer = dataPath / "priorMat.csv", index_col=0)
priorMatcv = pd.read_csv(filepath_or_buffer = dataPath / "priorMatcvt.csv", index_col=0)

In [72]:
out = pd.DataFrame(data=np.empty(shape=(0,4)))

In [73]:
ii = plierRes["U"].loc[:,np.sum(a=plierRes["U"], axis=0) > 0].columns

In [74]:
Uauc = copyMat(df=plierRes["U"], zero=True)
Up = np.ones(shape=plierRes["U"].shape)

In [75]:
ii

Index(['LV1', 'LV2', 'LV4', 'LV5', 'LV6', 'LV7', 'LV8', 'LV9', 'LV10', 'LV11',
       'LV13', 'LV14', 'LV15', 'LV20', 'LV21', 'LV23', 'LV26', 'LV27', 'LV28',
       'LV29'],
      dtype='object')

In [76]:
i = ii[0]

In [77]:
iipath = plierRes["U"].loc[(plierRes["U"].loc[:,i] > 0), i].index

In [142]:
plierRes["U"].loc[(plierRes["U"].loc[:,i] > 0), i].index

Index(['REACTOME_GENERIC_TRANSCRIPTION_PATHWAY'], dtype='object', name='index')

In [78]:
plierRes["U"]

Unnamed: 0_level_0,LV1,LV2,LV3,LV4,LV5,LV6,LV7,LV8,LV9,LV10,...,LV21,LV22,LV23,LV24,LV25,LV26,LV27,LV28,LV29,LV30
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
IRIS_Bcell-Memory_IgG_IgA,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,0
IRIS_Bcell-Memory_IgM,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,0
IRIS_Bcell-naive,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,0
IRIS_CD4Tcell-N0,0.0,0.0,0,0.0,0.0,0.031636,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,0
IRIS_CD4Tcell-Th1-restimulated12hour,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PID_IL4_2PATHWAY,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,0
REACTOME_SIGNALING_BY_THE_B_CELL_RECEPTOR_BCR,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,0
PID_BCR_5PATHWAY,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,0
PID_TELOMERASEPATHWAY,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,0


In [79]:
(len(iipath) > 1)

False

In [143]:
j = iipath[0]

In [81]:
priorMat

Unnamed: 0_level_0,IRIS_Bcell-Memory_IgG_IgA,IRIS_Bcell-Memory_IgM,IRIS_Bcell-naive,IRIS_CD4Tcell-N0,IRIS_CD4Tcell-Th1-restimulated12hour,IRIS_CD4Tcell-Th1-restimulated48hour,IRIS_CD4Tcell-Th2-restimulated12hour,IRIS_CD4Tcell-Th2-restimulated48hour,IRIS_CD8Tcell-N0,IRIS_DendriticCell-Control,...,KEGG_GNRH_SIGNALING_PATHWAY,KEGG_BASAL_TRANSCRIPTION_FACTORS,REACTOME_SYNTHESIS_OF_DNA,KEGG_HEMATOPOIETIC_CELL_LINEAGE,KEGG_T_CELL_RECEPTOR_SIGNALING_PATHWAY,PID_IL4_2PATHWAY,REACTOME_SIGNALING_BY_THE_B_CELL_RECEPTOR_BCR,PID_BCR_5PATHWAY,PID_TELOMERASEPATHWAY,PID_PI3KPLCTRKPATHWAY
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GAS6,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
MMP14,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
MARCKSL1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SPARC,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CTSD,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TRPM4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
LAIR2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZNF135,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
MARCH3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [144]:
priorMat.loc[:,iipath]

index,REACTOME_GENERIC_TRANSCRIPTION_PATHWAY
index,Unnamed: 1_level_1
GAS6,0
MMP14,0
MARCKSL1,0
SPARC,0
CTSD,0
...,...
TRPM4,0
LAIR2,0
ZNF135,0
MARCH3,0


In [116]:
priorMat.loc[:,iipath] == 0

index,REACTOME_GENERIC_TRANSCRIPTION_PATHWAY
index,Unnamed: 1_level_1
GAS6,True
MMP14,True
MARCKSL1,True
SPARC,True
CTSD,True
...,...
TRPM4,True
LAIR2,True
ZNF135,True
MARCH3,True


In [104]:
priorMat.index[(priorMat.loc[:,iipath] == 0).iloc[:,0]]

Index(['GAS6', 'MMP14', 'MARCKSL1', 'SPARC', 'CTSD', 'EPAS1', 'PALLD', 'PHC2',
       'LGALS3BP', 'SERPING1',
       ...
       'CDHR1', 'RRP12', 'SEC31B', 'GZMM', 'PLEKHF1', 'TRPM4', 'LAIR2',
       'ZNF135', 'MARCH3', 'SEPT8'],
      dtype='object', name='index', length=5690)

In [107]:
(priorMat.loc[:,j] > 0 & priorMatcv.loc[:,j] == 0).all

ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [111]:
priorMat.loc[:,j]

index,REACTOME_GENERIC_TRANSCRIPTION_PATHWAY
index,Unnamed: 1_level_1
GAS6,0
MMP14,0
MARCKSL1,0
SPARC,0
CTSD,0
...,...
TRPM4,0
LAIR2,0
ZNF135,0
MARCH3,0


In [110]:
priorMatcv.loc[:,j]

index,REACTOME_GENERIC_TRANSCRIPTION_PATHWAY
index,Unnamed: 1_level_1
GAS6,0
MMP14,0
MARCKSL1,0
SPARC,0
CTSD,0
...,...
TRPM4,0
LAIR2,0
ZNF135,0
MARCH3,0


In [146]:
priorMat.loc[:,j]

index
GAS6        0
MMP14       0
MARCKSL1    0
SPARC       0
CTSD        0
           ..
TRPM4       0
LAIR2       0
ZNF135      0
MARCH3      0
SEPT8       0
Name: REACTOME_GENERIC_TRANSCRIPTION_PATHWAY, Length: 5964, dtype: int64

In [150]:
priorMat.loc[:,[j]]

index
GAS6        0
MMP14       0
MARCKSL1    0
SPARC       0
CTSD        0
           ..
TRPM4       0
LAIR2       0
ZNF135      0
MARCH3      0
SEPT8       0
Name: REACTOME_GENERIC_TRANSCRIPTION_PATHWAY, Length: 5964, dtype: int64

In [223]:
alfa = pd.concat([priorMat.loc[:,j], priorMatcv.loc[:,j]], axis = 1).apply(lambda x: (x[0] == 0) or ((x[0] > 0) & (x[1] == 0)), axis=1)

In [224]:
alfa.index

Index(['GAS6', 'MMP14', 'MARCKSL1', 'SPARC', 'CTSD', 'EPAS1', 'PALLD', 'PHC2',
       'LGALS3BP', 'SERPING1',
       ...
       'CDHR1', 'RRP12', 'SEC31B', 'GZMM', 'PLEKHF1', 'TRPM4', 'LAIR2',
       'ZNF135', 'MARCH3', 'SEPT8'],
      dtype='object', name='index', length=5964)

In [162]:
alfa.values

array([ True,  True,  True, ...,  True,  True,  True])

In [225]:
pd.concat(
    [priorMat.loc[:,j], priorMatcv.loc[:,j]],
    axis = 1
)\
.apply(
    lambda x: \
        True if (x[0] == 0) or ((x[0] > 0) and (x[1] == 0)) \
        else np.NaN, axis=1)\
.dropna()\
.index

Index(['GAS6', 'MMP14', 'MARCKSL1', 'SPARC', 'CTSD', 'EPAS1', 'PALLD', 'PHC2',
       'LGALS3BP', 'SERPING1',
       ...
       'CDHR1', 'RRP12', 'SEC31B', 'GZMM', 'PLEKHF1', 'TRPM4', 'LAIR2',
       'ZNF135', 'MARCH3', 'SEPT8'],
      dtype='object', name='index', length=5744)

In [226]:
iiheldout = \
    pd.concat(
        [priorMat.loc[:,j], priorMatcv.loc[:,j]],
        axis = 1
    )\
    .apply(
        lambda x: \
            True if (x[0] == 0) or ((x[0] > 0) and (x[1] == 0)) \
            else np.NaN, axis=1)\
    .dropna()\
    .index

In [227]:
priorMat.loc[iiheldout, j]

index
GAS6        0
MMP14       0
MARCKSL1    0
SPARC       0
CTSD        0
           ..
TRPM4       0
LAIR2       0
ZNF135      0
MARCH3      0
SEPT8       0
Name: REACTOME_GENERIC_TRANSCRIPTION_PATHWAY, Length: 5744, dtype: int64

In [188]:
plierRes["Z"]

Unnamed: 0_level_0,LV1,LV2,LV3,LV4,LV5,LV6,LV7,LV8,LV9,LV10,...,LV21,LV22,LV23,LV24,LV25,LV26,LV27,LV28,LV29,LV30
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GAS6,0.000000,0.000000,0.014795,0.036853,0.157699,0.013139,0.000000,0.310853,0.023559,0.019356,...,0.000000,0.071844,0.000000,0.013445,0.061465,0.075264,0.083397,0.000000,0.000000,0.079906
MMP14,0.059540,0.009316,0.013828,0.043153,0.265555,0.034890,0.000000,0.041440,0.000000,0.000000,...,0.000000,0.068416,0.138661,0.023125,0.042852,0.030643,0.000000,0.078376,0.000000,0.000000
MARCKSL1,0.000000,0.000000,0.000000,0.000000,0.000000,0.069569,0.012696,0.000000,0.000000,0.000000,...,0.000000,0.013136,0.118920,0.043509,0.009146,0.000000,0.018254,0.259054,0.043946,0.128887
SPARC,0.000000,0.000000,0.006282,0.000000,0.000000,0.000000,0.001946,0.000000,0.005708,0.000000,...,0.000000,0.000000,0.062600,0.000000,0.000000,0.000460,0.000000,0.000000,0.032734,0.069462
CTSD,0.000000,0.055735,0.000000,0.000000,0.168421,0.000000,0.000000,0.000000,0.002130,0.000000,...,0.000000,0.000000,0.061749,0.029423,0.000000,0.000000,0.020375,0.211309,0.044222,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TRPM4,0.151033,0.164691,0.016808,0.000000,0.148611,0.101678,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.079306,0.150203,0.000000,0.081279,0.000000,0.000000,0.000000,0.000000
LAIR2,0.015242,0.000000,0.000000,0.036271,0.095136,0.069436,0.000000,0.000000,0.144361,0.000000,...,0.080698,0.000000,0.000000,0.080235,0.000000,0.000000,0.032286,0.109234,0.035893,0.000000
ZNF135,0.002063,0.000000,0.027388,0.034529,0.067029,0.066422,0.000000,0.000662,0.000000,0.000000,...,0.000000,0.000000,0.015193,0.026892,0.000000,0.085010,0.127863,0.038914,0.081620,0.000000
MARCH3,0.041933,0.000000,0.000000,0.000000,0.000000,0.062566,0.007769,0.000000,0.033165,0.097704,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.109461,0.082764,0.013118,0.152130


In [189]:
plierRes["U"]

Unnamed: 0_level_0,LV1,LV2,LV3,LV4,LV5,LV6,LV7,LV8,LV9,LV10,...,LV21,LV22,LV23,LV24,LV25,LV26,LV27,LV28,LV29,LV30
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
IRIS_Bcell-Memory_IgG_IgA,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,0
IRIS_Bcell-Memory_IgM,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,0
IRIS_Bcell-naive,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,0
IRIS_CD4Tcell-N0,0.0,0.0,0,0.0,0.0,0.031636,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,0
IRIS_CD4Tcell-Th1-restimulated12hour,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PID_IL4_2PATHWAY,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,0
REACTOME_SIGNALING_BY_THE_B_CELL_RECEPTOR_BCR,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,0
PID_BCR_5PATHWAY,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,0
PID_TELOMERASEPATHWAY,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,0


In [128]:
plierRes["Z"].columns = plierRes["U"].columns

In [190]:
plierRes["Z"].loc[iiheldout, i]

index
GAS6        0.000000
MMP14       0.059540
MARCKSL1    0.000000
SPARC       0.000000
CTSD        0.000000
              ...   
TRPM4       0.151033
LAIR2       0.015242
ZNF135      0.002063
MARCH3      0.041933
SEPT8       0.000000
Name: LV1, Length: 5744, dtype: float64

In [220]:
alfa = priorMat.loc[iiheldout, j]
%store alfa

Stored 'alfa' (Series)


In [221]:
beta = plierRes["Z"].loc[iiheldout, i]
%store beta

Stored 'beta' (Series)


In [228]:
aucres = AUC(priorMat.loc[iiheldout, j], plierRes["Z"].loc[iiheldout, i])

In [229]:
aucres

{'low': 0.0,
 'high': 0.0189180614565336,
 'auc': 0.5952222873136758,
 'pval': 0.003939670634429234}

In [196]:
labels = priorMat.loc[iiheldout, j]
values = plierRes["Z"].loc[iiheldout, i]

In [200]:
posii = labels[labels > 0]
posii

index
NR3C2      1
PPM1A      1
SP1        1
NR1D1      1
ZNF3       1
ZNF589     1
ZNF155     1
ZNF439     1
ZKSCAN1    1
ZNF655     1
RORC       1
ZNF555     1
NCOA1      1
ZNF493     1
ZNF570     1
NR6A1      1
ZNF510     1
ZNF235     1
ZNF230     1
RUNX2      1
MED31      1
ZNF254     1
ZIK1       1
MED17      1
MAML3      1
TEAD2      1
ESRRA      1
ZNF333     1
ZNF101     1
CCNC       1
NCOR1      1
ZNF317     1
MYC        1
ZNF649     1
ZFP37      1
ZNF440     1
ZNF442     1
ZNF600     1
ZNF607     1
MED16      1
ZNF480     1
JUNB       1
RARA       1
ZNF569     1
ZNF563     1
ZNF169     1
ZNF222     1
NR1H2      1
MED24      1
PARP1      1
NR3C1      1
ZNF12      1
ZNF263     1
TGIF1      1
Name: REACTOME_GENERIC_TRANSCRIPTION_PATHWAY, dtype: int64

In [201]:
negii = labels[labels <= 0]
negii

index
GAS6        0
MMP14       0
MARCKSL1    0
SPARC       0
CTSD        0
           ..
TRPM4       0
LAIR2       0
ZNF135      0
MARCH3      0
SEPT8       0
Name: REACTOME_GENERIC_TRANSCRIPTION_PATHWAY, Length: 5690, dtype: int64

In [202]:
posn = len(posii)
posn

54

In [203]:
negn = len(negii)
negn

5690

In [206]:
posval = values[posii.index]
posval

index
NR3C2      0.146526
PPM1A      0.021578
SP1        0.207947
NR1D1      0.000000
ZNF3       0.000000
ZNF589     0.000000
ZNF155     0.139871
ZNF439     0.000000
ZKSCAN1    0.226709
ZNF655     0.082738
RORC       0.026113
ZNF555     0.123513
NCOA1      0.159137
ZNF493     0.203500
ZNF570     0.111064
NR6A1      0.045465
ZNF510     0.111278
ZNF235     0.000000
ZNF230     0.000000
RUNX2      0.055897
MED31      0.000000
ZNF254     0.123818
ZIK1       0.054093
MED17      0.089398
MAML3      0.212591
TEAD2      0.000000
ESRRA      0.000000
ZNF333     0.000000
ZNF101     0.000000
CCNC       0.018918
NCOR1      0.195019
ZNF317     0.018019
MYC        0.005178
ZNF649     0.019188
ZFP37      0.039350
ZNF440     0.047262
ZNF442     0.000000
ZNF600     0.000000
ZNF607     0.023685
MED16      0.000000
ZNF480     0.075957
JUNB       0.000000
RARA       0.000000
ZNF569     0.111133
ZNF563     0.006369
ZNF169     0.117842
ZNF222     0.000000
NR1H2      0.000000
MED24      0.000000
PARP1      0.0

In [207]:
negval = values[negii.index]
negval

index
GAS6        0.000000
MMP14       0.059540
MARCKSL1    0.000000
SPARC       0.000000
CTSD        0.000000
              ...   
TRPM4       0.151033
LAIR2       0.015242
ZNF135      0.002063
MARCH3      0.041933
SEPT8       0.000000
Name: LV1, Length: 5690, dtype: float64

In [211]:
statistic, pvalue = mannwhitneyu(posval, negval, alternative = "greater")

In [212]:
conf_int_low, conf_int_high = mannwhitneyu_conf_int(posval, negval)

In [213]:
res = {
    "low": conf_int_low,
    "high": conf_int_high,
    "auc": (statistic / (posn * negn)),
    "pval": pvalue
}

In [214]:
res

{'low': 0.0,
 'high': 0.0189180614565336,
 'auc': 0.5952222873136758,
 'pval': 0.003939670634429234}

In [232]:
aucres = AUC(priorMat.loc[iiheldout, j], plierRes["Z"].loc[iiheldout, i])

In [233]:
aucres

{'low': 0.0,
 'high': 0.0189180614565336,
 'auc': 0.5952222873136758,
 'pval': 0.003939670634429234}

In [250]:
pd.DataFrame({0: [j], 1: [i], 2: [aucres["auc"]], 3: aucres["pval"]})

Unnamed: 0,0,1,2,3
0,REACTOME_GENERIC_TRANSCRIPTION_PATHWAY,LV1,0.595222,0.00394


In [251]:
out.append(pd.DataFrame({0: [j], 1: [i], 2: [aucres["auc"]], 3: aucres["pval"]}))

Unnamed: 0,0,1,2,3
0,REACTOME_GENERIC_TRANSCRIPTION_PATHWAY,LV1,0.595222,0.00394


In [None]:
out = rbind(out, c(colnames(priorMat)[j], i, aucres$auc, aucres$pval))
Uauc.loc[j, i] = aucres["auc"]
Up.loc[j, i] = aucres["pval"]

In [None]:
if (len(iipath) > 1):
    for (j in iipath):

        iiheldout = \
                    pd.concat(
                        [priorMat.loc[:,j], priorMatcv.loc[:,j]],
                        axis = 1
                    )\
                    .apply(
                        lambda x: \
                            True if (x[0] == 0) or ((x[0] > 0) and (x[1] == 0)) \
                            else np.nan, axis=1)\
                    .dropna()\
                    .index

        aucres = AUC(priorMat.loc[iiheldout, j], plierRes["Z"].loc[iiheldout, i])

        out = out.append(
                other = pd.DataFrame(
                    {
                        "pathway": [j],
                        "LV index": [i],
                        "AUC": [aucres["auc"]],
                        "p-value": [aucres["pval"]]
                    }
                )
            )
            Uauc.loc[j, i] = aucres["auc"]
            Up.loc[j, i] = aucres["pval"]

else:
    j = iipath
    iiheldout = which((rowsums(matrix(priorMat.loc[, iipath], ncol = 1)) == 0) | (priorMat[, j] > 0 & priorMatcv[, j] == 0))
    aucres = AUC(priorMat[iiheldout, j], plierRes$Z[iiheldout, i])
    out = rbind(out, c(colnames(priorMat)[j], i, aucres$auc, aucres$pval))
    Uauc[j, i] = aucres$auc
    Up[j, i] = aucres$pval


In [None]:

out.loc[:,"fdr"] = multipletests(out["pval"], method = "fdr_bh"))
return {"Uauc": Uauc, "Upval": Up, "summary": out}