In [48]:
import pandas as pd
import numpy as np
from pathlib import Path
from scipy.linalg import solve

from copy import deepcopy

In [49]:
from typing import TypedDict

from pandas import DataFrame

PLIERResults = TypedDict(
    "PlierResults",
    {
        "residual": DataFrame,
        "B": DataFrame,
        "Z": DataFrame,
        "U": DataFrame,
        "C": DataFrame,
        "L1": float,
        "L2": float,
        "L3": float,
        "heldOutGenes": dict[str, list[str]],
        "withPrior": dict[str, int],
        "Uauc": DataFrame,
        "Up": DataFrame,
        "summary": DataFrame,
    },
)

In [50]:
def getCutoff(plierRes: PLIERResults, fdr_cutoff: float = 0.01) -> float:
    return max(plierRes["summary"][plierRes["summary"]["FDR"] <= fdr_cutoff]["p-value"])

In [7]:
dataPath = Path("/Users/milessmith/workspace/pyplier/tests/data/common")
data = pd.read_csv(filepath_or_buffer=dataPath / "plierRes_data.csv", index_col=0)

In [52]:
plierRes_b = pd.read_csv(filepath_or_buffer=dataPath / "plierRes_b.csv", index_col=0)
plierRes_c = pd.read_csv(filepath_or_buffer=dataPath / "plierRes_c.csv", index_col=0)
plierRes_residual = pd.read_csv(
    filepath_or_buffer=dataPath / "plierRes_residual.csv", index_col=0
)
plierRes_u = pd.read_csv(filepath_or_buffer=dataPath / "plierRes_u.csv", index_col=0)
plierRes_z = pd.read_csv(filepath_or_buffer=dataPath / "plierRes_z.csv", index_col=0)
plierRes_heldoutgenes = pd.read_csv(
    filepath_or_buffer=dataPath / "plierRes_heldOutGenes.csv", index_col=0
)

plierRes_up = pd.read_csv(filepath_or_buffer=dataPath / "plierRes_up.csv", index_col=0)
plierRes_uauc = pd.read_csv(
    filepath_or_buffer=dataPath / "plierRes_uauc.csv", index_col=0
)
plierRes_summary = pd.read_csv(
    filepath_or_buffer=dataPath / "plierRes_summary.csv", index_col=0
)

priorMat = pd.read_csv(
    filepath_or_buffer=dataPath / "plierRes_priorMat.csv", index_col=0
)
pathwayMat = pd.read_csv(
    filepath_or_buffer=dataPath / "plierRes_pathwayMat.csv", index_col=0
)

In [53]:
plierRes = {
    "residual": plierRes_residual,
    "B": plierRes_b,
    "C": plierRes_c,
    "U": plierRes_u,
    "Z": plierRes_z,
    "L1": 18.5633,
    "L2": 37.12661,
    "L3": 0.0005530844,
    "Up": plierRes_up,
    "Uauc": plierRes_uauc,
    "summary": plierRes_summary,
    "heldOutGenes": {
        k: g["value"].tolist() for k, g in plierRes_heldoutgenes.groupby("name")
    },
}

In [88]:
ngenes = 50
auc_cutoff = 0.2
fdr_cutoff = 0.01

In [89]:
pathwayMat.head()

Unnamed: 0,IRIS_Bcell-Memory_IgG_IgA,IRIS_Bcell-Memory_IgM,IRIS_Bcell-naive,IRIS_CD4Tcell-N0,IRIS_CD4Tcell-Th1-restimulated12hour,IRIS_CD4Tcell-Th1-restimulated48hour,IRIS_CD4Tcell-Th2-restimulated12hour,IRIS_CD4Tcell-Th2-restimulated48hour,IRIS_CD8Tcell-N0,IRIS_DendriticCell-Control,...,KEGG_GNRH_SIGNALING_PATHWAY,KEGG_BASAL_TRANSCRIPTION_FACTORS,REACTOME_SYNTHESIS_OF_DNA,KEGG_HEMATOPOIETIC_CELL_LINEAGE,KEGG_T_CELL_RECEPTOR_SIGNALING_PATHWAY,PID_IL4_2PATHWAY,REACTOME_SIGNALING_BY_THE_B_CELL_RECEPTOR_BCR,PID_BCR_5PATHWAY,PID_TELOMERASEPATHWAY,PID_PI3KPLCTRKPATHWAY
GAS6,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
MMP14,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
MARCKSL1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SPARC,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CTSD,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [90]:
plierRes["Z"].index

Index(['GAS6', 'MMP14', 'MARCKSL1', 'SPARC', 'CTSD', 'EPAS1', 'PALLD', 'PHC2',
       'LGALS3BP', 'SERPING1',
       ...
       'LDHA', 'LDHB', 'ACAP1', 'ACAP2', 'ACAP3', 'CFL2', 'CFL1', 'SELL',
       'GNGT2', 'SERPINH1'],
      dtype='object', length=5892)

In [91]:
plierRes["U"].index

Index(['IRIS_Bcell-Memory_IgG_IgA', 'IRIS_Bcell-Memory_IgM',
       'IRIS_Bcell-naive', 'IRIS_CD4Tcell-N0',
       'IRIS_CD4Tcell-Th1-restimulated12hour',
       'IRIS_CD4Tcell-Th1-restimulated48hour',
       'IRIS_CD4Tcell-Th2-restimulated12hour',
       'IRIS_CD4Tcell-Th2-restimulated48hour', 'IRIS_CD8Tcell-N0',
       'IRIS_DendriticCell-Control',
       ...
       'KEGG_GNRH_SIGNALING_PATHWAY', 'KEGG_BASAL_TRANSCRIPTION_FACTORS',
       'REACTOME_SYNTHESIS_OF_DNA', 'KEGG_HEMATOPOIETIC_CELL_LINEAGE',
       'KEGG_T_CELL_RECEPTOR_SIGNALING_PATHWAY', 'PID_IL4_2PATHWAY',
       'REACTOME_SIGNALING_BY_THE_B_CELL_RECEPTOR_BCR', 'PID_BCR_5PATHWAY',
       'PID_TELOMERASEPATHWAY', 'PID_PI3KPLCTRKPATHWAY'],
      dtype='object', length=606)

In [92]:
pathwayMat_subset = pathwayMat.loc[plierRes["Z"].index, plierRes["U"].index]

In [93]:
pathwayMat_subset

Unnamed: 0,IRIS_Bcell-Memory_IgG_IgA,IRIS_Bcell-Memory_IgM,IRIS_Bcell-naive,IRIS_CD4Tcell-N0,IRIS_CD4Tcell-Th1-restimulated12hour,IRIS_CD4Tcell-Th1-restimulated48hour,IRIS_CD4Tcell-Th2-restimulated12hour,IRIS_CD4Tcell-Th2-restimulated48hour,IRIS_CD8Tcell-N0,IRIS_DendriticCell-Control,...,KEGG_GNRH_SIGNALING_PATHWAY,KEGG_BASAL_TRANSCRIPTION_FACTORS,REACTOME_SYNTHESIS_OF_DNA,KEGG_HEMATOPOIETIC_CELL_LINEAGE,KEGG_T_CELL_RECEPTOR_SIGNALING_PATHWAY,PID_IL4_2PATHWAY,REACTOME_SIGNALING_BY_THE_B_CELL_RECEPTOR_BCR,PID_BCR_5PATHWAY,PID_TELOMERASEPATHWAY,PID_PI3KPLCTRKPATHWAY
GAS6,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
MMP14,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
MARCKSL1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SPARC,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CTSD,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CFL2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CFL1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SELL,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
GNGT2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [94]:
Uuse = plierRes["U"].copy()

In [95]:
Uuse

Unnamed: 0,LV1,LV2,LV3,LV4,LV5,LV6,LV7,LV8,LV9,LV10,...,LV21,LV22,LV23,LV24,LV25,LV26,LV27,LV28,LV29,LV30
IRIS_Bcell-Memory_IgG_IgA,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0
IRIS_Bcell-Memory_IgM,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0
IRIS_Bcell-naive,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0
IRIS_CD4Tcell-N0,0.0,0.0,0.0,0.0,0.0,0.085437,0.0,0.0,0.0,0.0,...,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0
IRIS_CD4Tcell-Th1-restimulated12hour,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PID_IL4_2PATHWAY,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0
REACTOME_SIGNALING_BY_THE_B_CELL_RECEPTOR_BCR,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0
PID_BCR_5PATHWAY,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0
PID_TELOMERASEPATHWAY,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0


In [96]:
pathwayMat = pathwayMat.loc[plierRes["Z"].index, plierRes["U"].index]
uarr = plierRes["U"].values
uarr = np.where(plierRes["U"] < auc_cutoff, 0, uarr)
uarr = np.where(plierRes["Up"] > getCutoff(plierRes, fdr_cutoff), 0, uarr)
Uuse = pd.DataFrame(data=uarr, index=plierRes["U"].index, columns=plierRes["U"].columns)
Uuse.index = plierRes["U"].index
Uuse.columns = plierRes["U"].columns
inpath = intop = plierRes["Z"].shape[1]

In [97]:
auc_cutoff

0.2

In [98]:
plierRes["U"].iloc[np.where(plierRes["U"] >= 0.2)]

Unnamed: 0,LV15,LV2,LV8,LV20,LV27
IRIS_Bcell-naive,0.203218,0.0,0.0,0.0,0.0
IRIS_Neutrophil-Resting,0.0,0.298236,0.0,0.0,0.0
IRIS_PlasmaCell-FromPBMC,0.0,0.0,0.236176,0.0,0.0
DMAP_MEGA2,0.0,0.0,0.0,0.337195,0.0
REACTOME_INTERFERON_ALPHA_BETA_SIGNALING,0.0,0.0,0.0,0.0,0.227063


In [None]:
for i in range(intop):
    iipath = np.where(Uuse.iloc[:, i] > 0)
    if len(iipath) > 0:
        pathGenes = rowsums(pathwayMat[, iipath, drop = F], parallel = TRUE) %>% which(. > 0) %>% names()
        pathGenes = pathwayMat.pathwayMat.iloc[:,iipath].apply(sum, axis="columns") > 0
            
        pathGenes = names(which(rowsums(pathwayMat[, iipath, drop = F], parallel = TRUE) > 0))
            
        topGenes = names(sort(plierRes$Z[, i], T)[1:ngenes])
        pathGenesInt = intersect(pathGenes, topGenes)
        inpath[i] = len(pathGenes)
        †intop[i] = len(pathGenesInt)

In [99]:
for i in range(intop):
    iipath = np.where(Uuse.iloc[:, i] > 0)
    if len(iipath) > 0:
        print(iipath)

(array([], dtype=int64),)
(array([19]),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([21]),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([2]),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([43]),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([338]),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)


In [103]:
i = 1

In [113]:
iipath = np.where(Uuse.iloc[:, i] > 0)[0]

In [114]:
iipath

array([19])

In [115]:
len(iipath) > 0

True

In [116]:
pathwayMat.iloc[:, iipath]

Unnamed: 0,IRIS_Neutrophil-Resting
GAS6,0
MMP14,0
MARCKSL1,0
SPARC,0
CTSD,0
...,...
CFL2,0
CFL1,0
SELL,0
GNGT2,0


In [None]:
pathGenes = pathwayMat.loc[pathwayMat.iloc[:,iipath].apply(sum, axis="columns") > 0,:].index
            
            
b
pathGenesInt = intersect(pathGenes, topGenes)
inpath[i] = len(pathGenes)
†intop[i] = len(pathGenesInt)

In [128]:
pathGenes = pathwayMat.loc[
    pathwayMat.iloc[:, iipath].apply(sum, axis="columns") > 0, :
].index

In [122]:
pathwayMat.loc[pathwayMat.iloc[:, iipath].apply(sum, axis="columns") > 0, :].index

Index(['PHC2', 'PPP4R1', 'CD14', 'PPP1R12B', 'SEC14L1', 'RALB', 'LIMK2',
       'UBE2B', 'RXRA', 'TNFAIP2',
       ...
       'PRAM1', 'CR1', 'MPZ', 'RNF2', 'PTPRN', 'RGS1', 'TEC', 'NLRP1', 'CD9',
       'PLAU'],
      dtype='object', length=285)

In [127]:
plierRes["Z"].iloc[:, i].sort_values(ascending=False)[1:ngenes].index

Index(['PLBD1', 'PPP4R1', 'PYGL', 'EMR3', 'NPL', 'C5AR1', 'GLT1D1', 'IGSF6',
       'REPS2', 'PLAUR', 'FCAR', 'MME', 'ARG1', 'SIRPB1', 'ZDHHC18', 'NDEL1',
       'CA4', 'NRBF2', 'DYSF', 'PGLYRP1', 'FRAT2', 'BMX', 'AQP9', 'TPST1',
       'PROK2', 'ORM1', 'PAK1', 'DGAT2', 'MMP25', 'CHST15', 'HAL', 'GPR27',
       'MPZL3', 'CCNJL', 'ALOX5', 'PPP1R3B', 'S100A12', 'PADI4', 'CREB5',
       'TLR8', 'DAPK2', 'SLC45A4', 'ST3GAL4', 'MBOAT2', 'TREM1', 'VNN2',
       'HSDL2', 'ADM', 'RALB'],
      dtype='object')

In [131]:
topGenes = plierRes["Z"].iloc[:, i].sort_values(ascending=False)[1:ngenes].index

In [132]:
topGenes.intersection(pathGenes)

Index(['PLBD1', 'PPP4R1', 'PYGL', 'EMR3', 'NPL', 'C5AR1', 'GLT1D1', 'IGSF6',
       'REPS2', 'PLAUR', 'FCAR', 'MME', 'ARG1', 'SIRPB1', 'ZDHHC18', 'NDEL1',
       'CA4', 'NRBF2', 'DYSF', 'PGLYRP1', 'FRAT2', 'BMX', 'AQP9', 'TPST1',
       'PROK2', 'ORM1', 'PAK1', 'DGAT2', 'MMP25', 'CHST15', 'HAL', 'GPR27',
       'MPZL3', 'CCNJL', 'ALOX5', 'PPP1R3B', 'S100A12', 'PADI4', 'CREB5',
       'TLR8', 'DAPK2', 'SLC45A4', 'ST3GAL4', 'MBOAT2', 'TREM1', 'VNN2',
       'HSDL2', 'ADM', 'RALB'],
      dtype='object')

In [139]:
alfa = np.zeros(4)
beta = np.zeros(4)
gamma = np.zeros(4)

In [145]:
pd.DataFrame(data={1: alfa, 2: beta, 3: gamma})

Unnamed: 0,1,2,3
0,0.0,0.0,0.0
1,0.0,0.0,0.0
2,0.0,0.0,0.0
3,0.0,0.0,0.0


In [None]:
np.zeros(4)