In [1]:
import numpy as np
import pandas as pd
from pathlib import Path

In [2]:
dataPath = Path.cwd().parent / "tests" / "data"

plierRes_b = pd.read_csv(filepath_or_buffer=dataPath.joinpath("common", "plierRes_b.csv"), index_col=0)
plierRes_c = pd.read_csv(filepath_or_buffer=dataPath.joinpath("common", "plierRes_c.csv"), index_col=0)
plierRes_residual = pd.read_csv(filepath_or_buffer=dataPath.joinpath("common", "plierRes_residual.csv"), index_col=0)
plierRes_u = pd.read_csv(filepath_or_buffer=dataPath.joinpath("common", "plierRes_u.csv"), index_col=0)
plierRes_z = pd.read_csv(filepath_or_buffer=dataPath.joinpath("common", "plierRes_z.csv"), index_col=0)
heldOutGenes = pd.read_csv(filepath_or_buffer=dataPath.joinpath("common", "plierRes_heldoutgenes.csv"), index_col=0)

plierRes_uauc = pd.read_csv(filepath_or_buffer=dataPath.joinpath( "common", "plierRes_uauc.csv"), index_col=0)
plierRes_up = pd.read_csv(filepath_or_buffer=dataPath.joinpath("common", "plierRes_up.csv"), index_col=0)
plierRes_summary = pd.read_csv(filepath_or_buffer=dataPath.joinpath("common", "plierRes_summary.csv"), index_col=0)

priorMat = pd.read_csv(filepath_or_buffer=dataPath.joinpath("common", "plierRes_priormat.csv"), index_col=0)
priorMatCV = pd.read_csv(filepath_or_buffer=dataPath.joinpath("common", "plierRes_priormatcv.csv"), index_col=0)

L1 = 18.43058
L2 = 36.86117
L3 = 0.0004307425
withPrior = {
    "LV1": 1, "LV2": 2, "LV3": 3, "LV4": 4, "LV5": 5, "LV6": 6, "LV7": 7,
    "LV8": 8, "LV9": 9, "LV10": 10, "LV11": 11, "LV14": 14, "LV15": 15,
    "LV18": 18, "LV20": 20, "LV23": 23, "LV24": 24, "LV26": 26,
    "LV27": 27, "LV29": 29, "LV30": 30
}

plierRes = {
    "B": plierRes_b,
    "C": plierRes_c,
    "residual": plierRes_residual,
    "U": plierRes_u,
    "Z": plierRes_z,
    "Uauc": plierRes_uauc,
    "Up": plierRes_up,
    "summary": plierRes_summary,
    "withPrior": withPrior,
    "heldOutGenes": {k: g["value"].tolist() for k, g in heldOutGenes.groupby("name")},
    "L1": L1,
    "L2": L2,
    "L3": L3,
}

In [3]:
top: int = 1
fdr_cutoff: float = 0.01
use: str = None

In [4]:
if use is None:
    use = "coef"
elif use not in ("coef", "AUC"):
    raise ValueError("only 'coef' and 'AUC' are the only valid options for the 'use' argument")

In [5]:
names = list()

if use == "coef":
    Uuse = plierRes["U"].copy(deep=True)
else:
    Uuse = plierRes["Uauc"].copy(deep=True)

if plierRes["Up"] is not None:
    pval_cutoff = max(plierRes["summary"].loc[plierRes["summary"]["FDR"] < fdr_cutoff, "p-value"])
    Uuse[plierRes["Up"] > pval_cutoff] = 0
else:
    print("No p-values in PLIER object: using coefficients only")

In [6]:
pval_cutoff

0.005872014916454

In [7]:
plierRes["Up"]

Unnamed: 0,LV1,LV2,LV3,LV4,LV5,LV6,LV7,LV8,LV9,LV10,...,LV21,LV22,LV23,LV24,LV25,LV26,LV27,LV28,LV29,LV30
IRIS_Bcell-Memory_IgG_IgA,1.0,1.0,1.0,1.0,1.0,1.000000,1.0,1.0,1.0,1.0,...,1,1,1.0,1.0,1,1.0,1.0,1,1.0,1.0
IRIS_Bcell-Memory_IgM,1.0,1.0,1.0,1.0,1.0,1.000000,1.0,1.0,1.0,1.0,...,1,1,1.0,1.0,1,1.0,1.0,1,1.0,1.0
IRIS_Bcell-naive,1.0,1.0,1.0,1.0,1.0,1.000000,1.0,1.0,1.0,1.0,...,1,1,1.0,1.0,1,1.0,1.0,1,1.0,1.0
IRIS_CD4Tcell-N0,1.0,1.0,1.0,1.0,1.0,0.000006,1.0,1.0,1.0,1.0,...,1,1,1.0,1.0,1,1.0,1.0,1,1.0,1.0
IRIS_CD4Tcell-Th1-restimulated12hour,1.0,1.0,1.0,1.0,1.0,1.000000,1.0,1.0,1.0,1.0,...,1,1,1.0,1.0,1,1.0,1.0,1,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PID_IL4_2PATHWAY,1.0,1.0,1.0,1.0,1.0,1.000000,1.0,1.0,1.0,1.0,...,1,1,1.0,1.0,1,1.0,1.0,1,1.0,1.0
REACTOME_SIGNALING_BY_THE_B_CELL_RECEPTOR_BCR,1.0,1.0,1.0,1.0,1.0,1.000000,1.0,1.0,1.0,1.0,...,1,1,1.0,1.0,1,1.0,1.0,1,1.0,1.0
PID_BCR_5PATHWAY,1.0,1.0,1.0,1.0,1.0,1.000000,1.0,1.0,1.0,1.0,...,1,1,1.0,1.0,1,1.0,1.0,1,1.0,1.0
PID_TELOMERASEPATHWAY,1.0,1.0,1.0,1.0,1.0,1.000000,1.0,1.0,1.0,1.0,...,1,1,1.0,1.0,1,1.0,1.0,1,1.0,1.0


In [8]:
max(Uuse.loc[:,"LV3"])

0.0

In [9]:
max(plierRes["U"].loc[:,"LV3"])

0.0174877392318942

In [10]:
mm = Uuse.apply(func = np.max, axis = 0)

In [11]:
for i in range(plierRes["U"].shape[1]):
    if mm[i] > 0:
        names.append(f"{i+1}," + ",".join(Uuse.iloc[:,i].sort_values(ascending=False).where(lambda x: x > 0).index[0:top]))
        # this should give us something like "LV1,REACTOME_GENERIC_TRANSCRIPTION_PATHWAY"
        # this also will only return pathways with some correlation - if there is 0, it will get dropped and the
        # [0:top] is ignored, grabbing just as much as it can
    elif max(plierRes["U"].iloc[:, i]) > 0:
        names.append(f"{i+1}," + ",".join(plierRes["U"].iloc[:,i].sort_values(ascending=False).where(lambda x: x > 0).index[0:top]))
    else:
        names.append(f"LV {i+1}")

In [14]:
from copy import deepcopy

In [12]:
mm

LV1     0.038920
LV2     0.298236
LV3     0.000000
LV4     0.000000
LV5     0.160069
LV6     0.155873
LV7     0.028599
LV8     0.236176
LV9     0.140063
LV10    0.008435
LV11    0.145258
LV12    0.000000
LV13    0.000000
LV14    0.136738
LV15    0.203218
LV16    0.000000
LV17    0.000000
LV18    0.010834
LV19    0.000000
LV20    0.337195
LV21    0.000000
LV22    0.000000
LV23    0.161855
LV24    0.000000
LV25    0.000000
LV26    0.123742
LV27    0.227063
LV28    0.000000
LV29    0.001722
LV30    0.145989
dtype: float64

In [13]:
names

['1,REACTOME_GENERIC_TRANSCRIPTION_PATHWAY',
 '2,IRIS_Neutrophil-Resting',
 '3,MIPS_SPLICEOSOME',
 '4,REACTOME_GENERIC_TRANSCRIPTION_PATHWAY',
 '5,IRIS_Monocyte-Day0',
 '6,DMAP_TCELLA6',
 '7,REACTOME_PROCESSING_OF_CAPPED_INTRON_CONTAINING_PRE_MRNA',
 '8,IRIS_PlasmaCell-FromPBMC',
 '9,DMAP_NKA2',
 '10,IRIS_Neutrophil-Resting',
 '11,DMAP_ERY3',
 'LV 12',
 'LV 13',
 '14,DMAP_TCELLA1',
 '15,IRIS_Bcell-naive',
 'LV 16',
 'LV 17',
 '18,IRIS_Neutrophil-Resting',
 'LV 19',
 '20,DMAP_MEGA2',
 'LV 21',
 'LV 22',
 '23,MIPS_55S_RIBOSOME_MITOCHONDRIAL',
 '24,REACTOME_GENERIC_TRANSCRIPTION_PATHWAY',
 'LV 25',
 '26,DMAP_DENDA1',
 '27,REACTOME_INTERFERON_ALPHA_BETA_SIGNALING',
 'LV 28',
 '29,REACTOME_GENERIC_TRANSCRIPTION_PATHWAY',
 '30,REACTOME_PEPTIDE_CHAIN_ELONGATION']

In [24]:
top

1

In [39]:
names.append(f"LV{i+1}," + ",".join(Uuse.iloc[:,i].sort_values(ascending=False).where(lambda x: x > 0).index[0:top]))

In [40]:
names

['LV1,REACTOME_GENERIC_TRANSCRIPTION_PATHWAY']

In [33]:
",".join(list(Uuse.iloc[:,i].sort_values(ascending=False).where(lambda x: x > 0).index[0:top])

['REACTOME_GENERIC_TRANSCRIPTION_PATHWAY']

In [30]:
",".join(Uuse.iloc[:,i].sort_values(ascending=False).where(lambda x: x > 0).index[0:top])

'REACTOME_GENERIC_TRANSCRIPTION_PATHWAY'