In [25]:
from typing import Dict
from collections.abc import Iterable

import numpy as np
import pandas as pd
from statsmodels.stats.multitest import multipletests
from tqdm.auto import tqdm

from pyplier.AUC import AUC
from pyplier.copyMat import copyMat
from pyplier.PLIERRes import PLIERResults

In [26]:
%store -r out
%store -r priorMat
%store -r priorMatCV

In [27]:
plierRes = out
priorMat = priorMat
priorMatcv = priorMatCV

In [28]:
out

B : 30 rows x 36 columns
Z : 5892 rows x 30 columns
U : 606 rows x 30 columns
C : 5892 rows x 606 columns
heldOutGenes: 606
withPrior: 0
Uauc: 0 rows x 0 columns
Up: 0 rows x 0 columns
summary: 0 rows x 0 columns
residual: 5892 rows x 36 columns
L1 is set to 18.1606
L2 is set to 36.3212
L3 is set to 0.0004

In [21]:
out.U.to_csv("debug_u_20220417.csv.gz")

In [22]:
out.Z.to_csv("debug_z_20220417.csv.gz")

In [23]:
priorMatcv.to_csv("debug_priormatcv_20220417.csv.gz")
priorMat.to_csv("debug_priormat_20220417.csv.gz")

In [29]:
out = pd.DataFrame(
    data=np.empty(shape=(0, 4)), columns=["pathway", "LV index", "AUC", "p-value"]
)
out_dict = dict()
ii = plierRes.U.loc[:, plierRes.U.sum(axis=0) > 0].columns
Uauc = pd.DataFrame(
    np.zeros(shape=plierRes.U.shape),
    index=plierRes.U.index,
    columns=plierRes.U.columns,
)
Up = pd.DataFrame(
    np.ones(shape=plierRes.U.shape),
    index=plierRes.U.index,
    columns=plierRes.U.columns,
)

In [5]:
for i in tqdm(ii):
    iipath = plierRes.U.loc[(plierRes.U.loc[:, i] > 0), i].index
    if len(iipath) > 1:
        for j in tqdm(iipath):
            iiheldout = (
                pd.concat([priorMat.loc[:, j], priorMatcv.loc[:, j]], axis=1)
                .apply(
                    lambda x: True
                    if (x[0] == 0) or ((x[0] > 0) and (x[1] == 0))
                    else np.nan,  # use np.nan instead of False so that we can drop entries in the chain
                    axis=1,
                )
                .dropna()
                .index
            )
            aucres = AUC(priorMat.loc[iiheldout, j], plierRes.Z.loc[iiheldout, i])
            out_dict[j] = {
                "pathway": j,
                "LV index": i,
                "AUC": aucres["auc"],
                "p-value": aucres["pval"],
            }
            Uauc.loc[j, i] = aucres["auc"]
            Up.loc[j, i] = aucres["pval"]

    else:
        j = iipath[0]
        iiheldout = (
            pd.concat([priorMat.loc[:, j], priorMatcv.loc[:, j]], axis=1)
            .apply(
                lambda x: True
                if (x[0] == 0) or ((x[0] > 0) and (x[1] == 0))
                else np.nan,
                axis=1,
            )
            .dropna()
            .index
        )

        aucres = AUC(priorMat.loc[iiheldout, j], plierRes.Z.loc[iiheldout, i])
        if isinstance(j, Iterable) and not isinstance(j, str):
            for _ in j:
                out_dict[_] = {
                    "pathway": _,
                    "LV index": i,
                    "AUC": aucres["auc"],
                    "p-value": aucres["pval"],
                }
                Uauc.loc[_, i] = aucres["auc"]
                Up.loc[_, i] = aucres["pval"]
        elif isinstance(j, str):
            out_dict[j] = {
                "pathway": j,
                "LV index": i,
                "AUC": aucres["auc"],
                "p-value": aucres["pval"],
            }
            Uauc.loc[j, i] = aucres["auc"]
            Up.loc[j, i] = aucres["pval"]

Index(['LV1', 'LV2', 'LV3', 'LV4', 'LV6', 'LV7', 'LV8', 'LV9', 'LV10', 'LV14',
       'LV15', 'LV16', 'LV20', 'LV23', 'LV24', 'LV27', 'LV28', 'LV29', 'LV30'],
      dtype='object')

In [30]:
i = ii[0]

In [31]:
iipath = plierRes.U.loc[(plierRes.U.loc[:, i] > 0), i].index

In [32]:
iipath

Index(['REACTOME_GENERIC_TRANSCRIPTION_PATHWAY', 'REACTOME_IMMUNE_SYSTEM',
       'REACTOME_TRANSMEMBRANE_TRANSPORT_OF_SMALL_MOLECULES'],
      dtype='object')

In [33]:
j = iipath[0]

In [34]:
iiheldout = (
    pd.concat([priorMat.loc[:, j], priorMatcv.loc[:, j]], axis=1)
    .apply(
        lambda x: True
        if (x[0] == 0) or ((x[0] > 0) and (x[1] == 0))
        else np.nan,  # use np.nan instead of False so that we can drop entries in the chain
        axis=1,
    )
    .dropna()
    .index
)

In [35]:
iiheldout

Index(['GAS6', 'MMP14', 'MARCKSL1', 'SPARC', 'CTSD', 'EPAS1', 'PALLD', 'PHC2',
       'LGALS3BP', 'SERPING1',
       ...
       'LDHA', 'LDHB', 'ACAP1', 'ACAP2', 'ACAP3', 'CFL2', 'CFL1', 'SELL',
       'GNGT2', 'SERPINH1'],
      dtype='object', name='gene', length=5672)

In [64]:
c = priorMat.loc[:, iipath].sum(axis=1).where(lambda x: x == 0).dropna().index
c

Index(['GAS6', 'MMP14', 'MARCKSL1', 'SPARC', 'EPAS1', 'PALLD', 'PHC2',
       'LGALS3BP', 'SERPING1', 'TGM2',
       ...
       'LDHD', 'GMPR2', 'LDHA', 'LDHB', 'ACAP1', 'ACAP2', 'ACAP3', 'CFL2',
       'CFL1', 'SERPINH1'],
      dtype='object', name='gene', length=4656)

In [57]:
a = priorMat.loc[:, j].where(lambda x: x > 0).dropna().index
a

Index(['RXRA', 'NR1H3', 'SMARCD3', 'NR3C2', 'MED25', 'ZNF528', 'ZNF552',
       'E2F5', 'NRBF2', 'CDKN2B',
       ...
       'ZNF266', 'ZNF267', 'ZNF263', 'ZNF547', 'ZNF546', 'ZNF548', 'TGIF2',
       'TGIF1', 'ZNF287', 'ZNF282'],
      dtype='object', name='gene', length=274)

In [58]:
b = priorMatcv.loc[:, j].where(lambda x: x == 0).dropna().index
b

Index(['GAS6', 'MMP14', 'MARCKSL1', 'SPARC', 'CTSD', 'EPAS1', 'PALLD', 'PHC2',
       'LGALS3BP', 'SERPING1',
       ...
       'LDHA', 'LDHB', 'ACAP1', 'ACAP2', 'ACAP3', 'CFL2', 'CFL1', 'SELL',
       'GNGT2', 'SERPINH1'],
      dtype='object', name='gene', length=5672)

In [60]:
a.intersection(b)

Index(['NRBF2', 'NOTCH4', 'ZNF175', 'E2F4', 'SMAD7', 'ZNF582', 'CDK9',
       'ZNF202', 'ZNF205', 'ZNF416', 'ZFP28', 'ZNF583', 'ZKSCAN5', 'ZKSCAN4',
       'ZNF615', 'NR1I3', 'ZNF558', 'NCOA2', 'NCOA3', 'ZNF180', 'ZNF230',
       'CCNT1', 'MED31', 'MAML2', 'TEAD2', 'ZNF33B', 'ZNF101', 'CCNC', 'NCOR2',
       'ZNF37A', 'ZNF426', 'THRB', 'ZNF649', 'ZNF398', 'ZFP37', 'ZNF337',
       'ZNF484', 'ZNF485', 'PPARD', 'ZNF544', 'RARA', 'RARG', 'ZNF559',
       'ZNF561', 'ZNF222', 'ZNF226', 'ZNF227', 'ZNF436', 'NR4A2', 'ZNF12',
       'ZNF267', 'ZNF263', 'ZNF547', 'TGIF1'],
      dtype='object', name='gene')

In [61]:
len(a.intersection(b))

54

In [66]:
d = c.union(a.intersection(b))
d

Index(['A2M', 'AANAT', 'AARS', 'AARS2', 'AASDH', 'AASDHPPT', 'AATK', 'ABAT',
       'ABCA1', 'ABCA5',
       ...
       'ZNF615', 'ZNF618', 'ZNF649', 'ZNF703', 'ZNFX1', 'ZNRD1', 'ZW10',
       'ZWILCH', 'ZWINT', 'ZYX'],
      dtype='object', name='gene', length=4710)

In [67]:
c = priorMat.loc[:, iipath].sum(axis=1).where(lambda x: x == 0).dropna().index
a = priorMat.loc[:, j].where(lambda x: x > 0).dropna().index
b = priorMatcv.loc[:, j].where(lambda x: x == 0).dropna().index
iiheldout = c.union(a.intersection(b))

In [68]:
aucres = AUC(priorMat.loc[iiheldout, j], plierRes.Z.loc[iiheldout, i])

In [13]:
out_dict[j] = {
    "pathway": j,
    "LV index": i,
    "AUC": aucres["auc"],
    "p-value": aucres["pval"],
}

In [14]:
Uauc.loc[j, i] = aucres["auc"]

In [69]:
aucres["auc"]

0.5391549732722413

In [70]:
aucres

{'low': -7.104548354439511e-05,
 'high': inf,
 'auc': 0.5391549732722413,
 'pval': 0.13507923396016042}

In [75]:
i = ii[4]

In [76]:
iipath = plierRes.U.loc[(plierRes.U.loc[:, i] > 0), i].index

In [77]:
iipath

Index(['REACTOME_GENERIC_TRANSCRIPTION_PATHWAY'], dtype='object')

In [81]:
j = iipath[0]

In [79]:
priorMat.loc[:, iipath].sum(axis=1).where(lambda x: x == 0).dropna().index

Index(['GAS6', 'MMP14', 'MARCKSL1', 'SPARC', 'CTSD', 'EPAS1', 'PALLD', 'PHC2',
       'LGALS3BP', 'SERPING1',
       ...
       'LDHA', 'LDHB', 'ACAP1', 'ACAP2', 'ACAP3', 'CFL2', 'CFL1', 'SELL',
       'GNGT2', 'SERPINH1'],
      dtype='object', name='gene', length=5618)

In [80]:
priorMat.loc[:, iipath].where(lambda x: x == 0).dropna().index

Index(['GAS6', 'MMP14', 'MARCKSL1', 'SPARC', 'CTSD', 'EPAS1', 'PALLD', 'PHC2',
       'LGALS3BP', 'SERPING1',
       ...
       'LDHA', 'LDHB', 'ACAP1', 'ACAP2', 'ACAP3', 'CFL2', 'CFL1', 'SELL',
       'GNGT2', 'SERPINH1'],
      dtype='object', name='gene', length=5618)

In [83]:
j

'REACTOME_GENERIC_TRANSCRIPTION_PATHWAY'

In [84]:
priorMat.loc[:, j].where(lambda x: x > 0).dropna().index

Index(['RXRA', 'NR1H3', 'SMARCD3', 'NR3C2', 'MED25', 'ZNF528', 'ZNF552',
       'E2F5', 'NRBF2', 'CDKN2B',
       ...
       'ZNF266', 'ZNF267', 'ZNF263', 'ZNF547', 'ZNF546', 'ZNF548', 'TGIF2',
       'TGIF1', 'ZNF287', 'ZNF282'],
      dtype='object', name='gene', length=274)

In [15]:
Up.loc[j, i] = aucres["pval"]

In [85]:
a = priorMat.loc[:, iipath].where(lambda x: x == 0).dropna().index
b = priorMat.loc[:, j].where(lambda x: x > 0).dropna().index
c = priorMatcv.loc[:, j].where(lambda x: x == 0).dropna().index
iiheldout = a.union(b.intersection(c))

In [86]:
aucres = AUC(priorMat.loc[iiheldout, j], plierRes.Z.loc[iiheldout, i])

In [87]:
aucres

{'low': 1.5898721390235826e-05,
 'high': inf,
 'auc': 0.5731082631225031,
 'pval': 0.02109867841667011}

In [None]:
if len(iipath) > 1:
    for j in tqdm(iipath):
        c = priorMat.loc[:, iipath].sum(axis=1).where(lambda x: x == 0).dropna().index
        a = priorMat.loc[:, j].where(lambda x: x > 0).dropna().index
        b = priorMatcv.loc[:, j].where(lambda x: x == 0).dropna().index
        iiheldout = c.union(a.intersection(b))
        aucres = AUC(priorMat.loc[iiheldout, j], plierRes.Z.loc[iiheldout, i])
        out_dict[j] = {
            "pathway": j,
            "LV index": i,
            "AUC": aucres["auc"],
            "p-value": aucres["pval"],
        }
        Uauc.loc[j, i] = aucres["auc"]
        Up.loc[j, i] = aucres["pval"]

In [48]:
iipaths = {i: plierRes.U.loc[(plierRes.U.loc[:, i] > 0), i].index for i in ii}

In [44]:
if isinstance(j, Iterable) and not isinstance(j, str):
    for _ in j:
        out_dict[_] = {
            "pathway": _,
            "LV index": i,
            "AUC": aucres["auc"],
            "p-value": aucres["pval"],
        }
        Uauc.loc[_, i] = aucres["auc"]
        Up.loc[_, i] = aucres["pval"]
elif isinstance(j, str):
    out_dict[j] = {
        "pathway": j,
        "LV index": i,
        "AUC": aucres["auc"],
        "p-value": aucres["pval"],
    }
    Uauc.loc[j, i] = aucres["auc"]
    Up.loc[j, i] = aucres["pval"]

In [45]:
out_dict

{'REACTOME_GENERIC_TRANSCRIPTION_PATHWAY': {'pathway': 'REACTOME_GENERIC_TRANSCRIPTION_PATHWAY',
  'LV index': 'LV30',
  'AUC': 0.4435280777395409,
  'p-value': 0.9342239015236435}}

In [50]:
for i in tqdm(ii):
    iipath = plierRes.U.loc[(plierRes.U.loc[:, i] > 0), i].index
    if len(iipath) > 1:
        for j in tqdm(iipath):
            iiheldout = (
                pd.concat([priorMat.loc[:, j], priorMatcv.loc[:, j]], axis=1)
                .apply(
                    lambda x: True
                    if (x[0] == 0) or ((x[0] > 0) and (x[1] == 0))
                    else np.nan,  # use np.nan instead of False so that we can drop entries in the chain
                    axis=1,
                )
                .dropna()
                .index
            )
            aucres = AUC(priorMat.loc[iiheldout, j], plierRes.Z.loc[iiheldout, i])
            out_dict[j] = {
                "pathway": j,
                "LV index": i,
                "AUC": aucres["auc"],
                "p-value": aucres["pval"],
            }
            Uauc.loc[j, i] = aucres["auc"]
            Up.loc[j, i] = aucres["pval"]

    else:
        j = iipath[0]
        iiheldout = (
            pd.concat([priorMat.loc[:, j], priorMatcv.loc[:, j]], axis=1)
            .apply(
                lambda x: True
                if (x[0] == 0) or ((x[0] > 0) and (x[1] == 0))
                else np.nan,
                axis=1,
            )
            .dropna()
            .index
        )

        aucres = AUC(priorMat.loc[iiheldout, j], plierRes.Z.loc[iiheldout, i])
        if isinstance(j, Iterable) and not isinstance(j, str):
            for _ in j:
                out_dict[_] = {
                    "pathway": _,
                    "LV index": i,
                    "AUC": aucres["auc"],
                    "p-value": aucres["pval"],
                }
                Uauc.loc[_, i] = aucres["auc"]
                Up.loc[_, i] = aucres["pval"]
        elif isinstance(j, str):
            out_dict[j] = {
                "pathway": j,
                "LV index": i,
                "AUC": aucres["auc"],
                "p-value": aucres["pval"],
            }
            Uauc.loc[j, i] = aucres["auc"]
            Up.loc[j, i] = aucres["pval"]

  0%|          | 0/19 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

j: REACTOME_GENERIC_TRANSCRIPTION_PATHWAY
i: LV6
iiheldout: Index(['GAS6', 'MMP14', 'MARCKSL1', 'SPARC', 'CTSD', 'EPAS1', 'PALLD', 'PHC2',
       'LGALS3BP', 'SERPING1',
       ...
       'LDHA', 'LDHB', 'ACAP1', 'ACAP2', 'ACAP3', 'CFL2', 'CFL1', 'SELL',
       'GNGT2', 'SERPINH1'],
      dtype='object', name='gene', length=5672)


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

j: REACTOME_GENERIC_TRANSCRIPTION_PATHWAY
i: LV9
iiheldout: Index(['GAS6', 'MMP14', 'MARCKSL1', 'SPARC', 'CTSD', 'EPAS1', 'PALLD', 'PHC2',
       'LGALS3BP', 'SERPING1',
       ...
       'LDHA', 'LDHB', 'ACAP1', 'ACAP2', 'ACAP3', 'CFL2', 'CFL1', 'SELL',
       'GNGT2', 'SERPINH1'],
      dtype='object', name='gene', length=5672)


  0%|          | 0/4 [00:00<?, ?it/s]

j: REACTOME_METABOLISM_OF_LIPIDS_AND_LIPOPROTEINS
i: LV14
iiheldout: Index(['GAS6', 'MMP14', 'MARCKSL1', 'SPARC', 'CTSD', 'EPAS1', 'PALLD', 'PHC2',
       'LGALS3BP', 'SERPING1',
       ...
       'LDHA', 'LDHB', 'ACAP1', 'ACAP2', 'ACAP3', 'CFL2', 'CFL1', 'SELL',
       'GNGT2', 'SERPINH1'],
      dtype='object', name='gene', length=5620)
j: REACTOME_ADAPTIVE_IMMUNE_SYSTEM
i: LV15
iiheldout: Index(['GAS6', 'MMP14', 'MARCKSL1', 'SPARC', 'EPAS1', 'PALLD', 'PHC2',
       'LGALS3BP', 'SERPING1', 'TGM2',
       ...
       'GMPR2', 'LDHA', 'LDHB', 'ACAP1', 'ACAP2', 'ACAP3', 'CFL2', 'CFL1',
       'GNGT2', 'SERPINH1'],
      dtype='object', name='gene', length=5520)
j: REACTOME_ADAPTIVE_IMMUNE_SYSTEM
i: LV16
iiheldout: Index(['GAS6', 'MMP14', 'MARCKSL1', 'SPARC', 'EPAS1', 'PALLD', 'PHC2',
       'LGALS3BP', 'SERPING1', 'TGM2',
       ...
       'GMPR2', 'LDHA', 'LDHB', 'ACAP1', 'ACAP2', 'ACAP3', 'CFL2', 'CFL1',
       'GNGT2', 'SERPINH1'],
      dtype='object', name='gene', length=5520)


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

j: REACTOME_GENERIC_TRANSCRIPTION_PATHWAY
i: LV28
iiheldout: Index(['GAS6', 'MMP14', 'MARCKSL1', 'SPARC', 'CTSD', 'EPAS1', 'PALLD', 'PHC2',
       'LGALS3BP', 'SERPING1',
       ...
       'LDHA', 'LDHB', 'ACAP1', 'ACAP2', 'ACAP3', 'CFL2', 'CFL1', 'SELL',
       'GNGT2', 'SERPINH1'],
      dtype='object', name='gene', length=5672)


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

In [51]:
out_dict

{'REACTOME_GENERIC_TRANSCRIPTION_PATHWAY': {'pathway': 'REACTOME_GENERIC_TRANSCRIPTION_PATHWAY',
  'LV index': 'LV29',
  'AUC': 0.5652664056010442,
  'p-value': 0.03678715458552349},
 'REACTOME_IMMUNE_SYSTEM': {'pathway': 'REACTOME_IMMUNE_SYSTEM',
  'LV index': 'LV27',
  'AUC': 0.5816224843267968,
  'p-value': 6.838012251357796e-05},
 'REACTOME_TRANSMEMBRANE_TRANSPORT_OF_SMALL_MOLECULES': {'pathway': 'REACTOME_TRANSMEMBRANE_TRANSPORT_OF_SMALL_MOLECULES',
  'LV index': 'LV1',
  'AUC': 0.5353698460449478,
  'p-value': 0.17616694200113858},
 'REACTOME_HEMOSTASIS': {'pathway': 'REACTOME_HEMOSTASIS',
  'LV index': 'LV24',
  'AUC': 0.617047189295977,
  'p-value': 0.00024058911737042017},
 'KEGG_MAPK_SIGNALING_PATHWAY': {'pathway': 'KEGG_MAPK_SIGNALING_PATHWAY',
  'LV index': 'LV10',
  'AUC': 0.569941561498906,
  'p-value': 0.05062362195664588},
 'MIPS_SPLICEOSOME': {'pathway': 'MIPS_SPLICEOSOME',
  'LV index': 'LV23',
  'AUC': 0.7600280495885846,
  'p-value': 2.3322137003185835e-07},
 'REACT

In [52]:
out = pd.DataFrame.from_dict(out_dict, orient="index")

In [53]:
out

Unnamed: 0,pathway,LV index,AUC,p-value
REACTOME_GENERIC_TRANSCRIPTION_PATHWAY,REACTOME_GENERIC_TRANSCRIPTION_PATHWAY,LV29,0.565266,0.03678715
REACTOME_IMMUNE_SYSTEM,REACTOME_IMMUNE_SYSTEM,LV27,0.581622,6.838012e-05
REACTOME_TRANSMEMBRANE_TRANSPORT_OF_SMALL_MOLECULES,REACTOME_TRANSMEMBRANE_TRANSPORT_OF_SMALL_MOLE...,LV1,0.53537,0.1761669
REACTOME_HEMOSTASIS,REACTOME_HEMOSTASIS,LV24,0.617047,0.0002405891
KEGG_MAPK_SIGNALING_PATHWAY,KEGG_MAPK_SIGNALING_PATHWAY,LV10,0.569942,0.05062362
MIPS_SPLICEOSOME,MIPS_SPLICEOSOME,LV23,0.760028,2.332214e-07
REACTOME_PLATELET_ACTIVATION_SIGNALING_AND_AGGREGATION,REACTOME_PLATELET_ACTIVATION_SIGNALING_AND_AGG...,LV24,0.620513,0.007683274
REACTOME_SIGNALLING_BY_NGF,REACTOME_SIGNALLING_BY_NGF,LV4,0.675721,3.206191e-05
REACTOME_CELL_CYCLE_MITOTIC,REACTOME_CELL_CYCLE_MITOTIC,LV23,0.577069,0.01581702
REACTOME_MITOTIC_G1_G1_S_PHASES,REACTOME_MITOTIC_G1_G1_S_PHASES,LV8,0.789125,5.360498e-08


In [56]:
Uauc

Unnamed: 0,LV1,LV2,LV3,LV4,LV5,LV6,LV7,LV8,LV9,LV10,...,LV21,LV22,LV23,LV24,LV25,LV26,LV27,LV28,LV29,LV30
IRIS_Bcell-Memory_IgG_IgA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
IRIS_Bcell-Memory_IgM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
IRIS_Bcell-naive,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
IRIS_CD4Tcell-N0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
IRIS_CD4Tcell-Th1-restimulated12hour,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PID_IL4_2PATHWAY,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
REACTOME_SIGNALING_BY_THE_B_CELL_RECEPTOR_BCR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PID_BCR_5PATHWAY,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PID_TELOMERASEPATHWAY,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [54]:
_, fdr, *_ = multipletests(out.loc[:, "p-value"], method="fdr_bh")
out.loc[:, "FDR"] = fdr
return_dict = {"Uauc": Uauc, "Upval": Up, "summary": out}

In [55]:
out

Unnamed: 0,pathway,LV index,AUC,p-value,FDR
REACTOME_GENERIC_TRANSCRIPTION_PATHWAY,REACTOME_GENERIC_TRANSCRIPTION_PATHWAY,LV29,0.565266,0.03678715,0.04483434
REACTOME_IMMUNE_SYSTEM,REACTOME_IMMUNE_SYSTEM,LV27,0.581622,6.838012e-05,0.0001403592
REACTOME_TRANSMEMBRANE_TRANSPORT_OF_SMALL_MOLECULES,REACTOME_TRANSMEMBRANE_TRANSPORT_OF_SMALL_MOLE...,LV1,0.53537,0.1761669,0.1963003
REACTOME_HEMOSTASIS,REACTOME_HEMOSTASIS,LV24,0.617047,0.0002405891,0.0004264989
KEGG_MAPK_SIGNALING_PATHWAY,KEGG_MAPK_SIGNALING_PATHWAY,LV10,0.569942,0.05062362,0.05982792
MIPS_SPLICEOSOME,MIPS_SPLICEOSOME,LV23,0.760028,2.332214e-07,6.996641e-07
REACTOME_PLATELET_ACTIVATION_SIGNALING_AND_AGGREGATION,REACTOME_PLATELET_ACTIVATION_SIGNALING_AND_AGG...,LV24,0.620513,0.007683274,0.009988256
REACTOME_SIGNALLING_BY_NGF,REACTOME_SIGNALLING_BY_NGF,LV4,0.675721,3.206191e-05,7.355379e-05
REACTOME_CELL_CYCLE_MITOTIC,REACTOME_CELL_CYCLE_MITOTIC,LV23,0.577069,0.01581702,0.01989883
REACTOME_MITOTIC_G1_G1_S_PHASES,REACTOME_MITOTIC_G1_G1_S_PHASES,LV8,0.789125,5.360498e-08,1.90054e-07


In [22]:
iiheldout

Index(['GAS6', 'MMP14', 'MARCKSL1', 'SPARC', 'CTSD', 'EPAS1', 'PALLD', 'PHC2',
       'LGALS3BP', 'SERPING1',
       ...
       'LDHA', 'LDHB', 'ACAP1', 'ACAP2', 'ACAP3', 'CFL2', 'CFL1', 'SELL',
       'GNGT2', 'SERPINH1'],
      dtype='object', name='gene', length=5738)

In [23]:
plierRes.Z.loc[iiheldout, i]

gene
GAS6        0.115022
MMP14       0.072330
MARCKSL1    0.065167
SPARC       0.023501
CTSD        0.188790
              ...   
CFL2        0.000000
CFL1        0.038926
SELL        0.000000
GNGT2       0.000000
SERPINH1    0.123516
Name: LV12, Length: 5738, dtype: float64

In [24]:
labels = priorMat.loc[iiheldout, j]

In [25]:
values = plierRes.Z.loc[iiheldout, i]

In [14]:
posii = labels[labels > 0]
negii = labels[labels <= 0]
posn = len(posii)
negn = len(negii)
posval = values[posii.index]
negval = values[negii.index]

In [15]:
posval

gene
GAS6        0.000000
MMP14       0.074708
MARCKSL1    0.000000
SPARC       0.000000
CTSD        0.000000
              ...   
CFL2        0.087434
CFL1        0.000000
SELL        0.036669
GNGT2       0.010840
SERPINH1    0.000000
Name: LV1, Length: 5843, dtype: float64

In [26]:
iiheldout

Index(['GAS6', 'MMP14', 'MARCKSL1', 'SPARC', 'CTSD', 'EPAS1', 'PALLD', 'PHC2',
       'LGALS3BP', 'SERPING1',
       ...
       'LDHA', 'LDHB', 'ACAP1', 'ACAP2', 'ACAP3', 'CFL2', 'CFL1', 'SELL',
       'GNGT2', 'SERPINH1'],
      dtype='object', name='gene', length=5738)

In [27]:
j

Index(['KEGG_MAPK_SIGNALING_PATHWAY'], dtype='object')

In [28]:
iipath

Index(['KEGG_MAPK_SIGNALING_PATHWAY'], dtype='object')

In [29]:
len(iipath) > 1

False

In [36]:
j.values

array(['KEGG_MAPK_SIGNALING_PATHWAY'], dtype=object)

In [39]:
priorMat.loc[iiheldout, j[0]]

gene
GAS6        0
MMP14       0
MARCKSL1    0
SPARC       0
CTSD        0
           ..
CFL2        0
CFL1        0
SELL        0
GNGT2       0
SERPINH1    0
Name: KEGG_MAPK_SIGNALING_PATHWAY, Length: 5738, dtype: int64

In [31]:
values

gene
GAS6        0.115022
MMP14       0.072330
MARCKSL1    0.065167
SPARC       0.023501
CTSD        0.188790
              ...   
CFL2        0.000000
CFL1        0.038926
SELL        0.000000
GNGT2       0.000000
SERPINH1    0.123516
Name: LV12, Length: 5738, dtype: float64