In [1]:
from typing import Dict

import numpy as np
import pandas as pd
from statsmodels.stats.multitest import multipletests
from tqdm.auto import tqdm

from pyplier.AUC import AUC
from pyplier.copyMat import copyMat
from pyplier.PLIERRes import PLIERResults
from pathlib import Path

In [2]:
data_dir = Path("/workspaces/pyplier/")
plierRes = PLIERResults.from_disk(loc=data_dir / "crossval_plierres.json")

priorMat = pd.read_csv(data_dir / "crossval_priormat.csv")
priorMatcv = pd.read_csv(data_dir / "crossval_priormatcv.csv")

In [3]:
out = pd.DataFrame(
        data=np.empty(shape=(0, 4)), columns=["pathway", "LV index", "AUC", "p-value"]
    )
out_dict = dict()
ii = plierRes.U.loc[:, plierRes.U.sum(axis=0) > 0].columns
Uauc = copyMat(df=plierRes.U, zero=True)
Up = pd.DataFrame(np.ones(shape=plierRes.U.shape))

In [5]:
ii

Index(['LV1', 'LV2', 'LV3', 'LV4', 'LV5', 'LV7', 'LV8', 'LV10', 'LV12', 'LV14',
       'LV15', 'LV16', 'LV18', 'LV20', 'LV23', 'LV24', 'LV25', 'LV27', 'LV28',
       'LV29', 'LV30'],
      dtype='object')

In [6]:
i = 'LV1'
iipath = plierRes.U.loc[(plierRes.U.loc[:, i] > 0), i].index

In [7]:
iipath

Index(['REACTOME_IMMUNE_SYSTEM', 'KEGG_PATHWAYS_IN_CANCER',
       'REACTOME_SIGNALLING_BY_NGF',
       'REACTOME_TRANSMEMBRANE_TRANSPORT_OF_SMALL_MOLECULES'],
      dtype='object')

In [9]:
iipath[0]

'REACTOME_IMMUNE_SYSTEM'

In [11]:
# for j in tqdm(iipath):
iiheldout = (
    pd.concat([priorMat.loc[:, j], priorMatcv.loc[:, j]], axis=1)
    .apply(
        lambda x: True
        if (x[0] == 0) or ((x[0] > 0) and (x[1] == 0))
        else np.nan,  # use np.nan instead of False so that we can drop entries in the chain
        axis=1,
    )
    .dropna()
    .index
)

In [20]:
i

'LV1'

In [19]:
aucres = AUC(
    priorMat.loc[iiheldout, j], plierRes.Z.loc[plierRes.Z.index[iiheldout], i]
)

IndexError: positional indexers are out-of-bounds

In [23]:
from typing import Dict, Tuple

import numpy as np
import pandas as pd
from scipy.stats import mannwhitneyu, norm

In [24]:
# def AUC(labels: pd.Series, values: pd.Series) -> Dict[str, float]:
labels = priorMat.loc[iiheldout, j]
values = plierRes.Z.loc[plierRes.Z.index[iiheldout], i]

In [26]:
posii = labels[labels > 0]
negii = labels[labels <= 0]

In [27]:
posn = len(posii)
negn = len(negii)

In [28]:
posii.index

Int64Index([], dtype='int64')

In [None]:
posval = values[posii.index]
negval = values[negii.index]

In [None]:
if posn > 0 and negn > 0:
    statistic, pvalue = mannwhitneyu(posval, negval, alternative="greater")
    conf_int_low, conf_int_high = mannwhitneyu_conf_int(posval, negval)
    res = {
        "low": conf_int_low,
        "high": conf_int_high,
        "auc": (statistic / (posn * negn)),
        "pval": pvalue,
    }
else:
    res = {"auc": 0.5, "pval": np.nan}

return res

In [None]:
out_dict[j] = {
    "pathway": j,
    "LV index": i,
    "AUC": aucres["auc"],
    "p-value": aucres["pval"],
}
Uauc.loc[j, i] = aucres["auc"]
Up.loc[j, i] = aucres["pval"]

In [8]:
for i in tqdm(ii):
    iipath = plierRes.U.loc[(plierRes.U.loc[:, i] > 0), i].index
    if len(iipath) > 1:
        for j in tqdm(iipath):
            iiheldout = (
                pd.concat([priorMat.loc[:, j], priorMatcv.loc[:, j]], axis=1)
                .apply(
                    lambda x: True
                    if (x[0] == 0) or ((x[0] > 0) and (x[1] == 0))
                    else np.nan,  # use np.nan instead of False so that we can drop entries in the chain
                    axis=1,
                )
                .dropna()
                .index
            )
            aucres = AUC(
                priorMat.loc[iiheldout, j], plierRes.Z.loc[iiheldout, i]
            )
            out_dict[j] = {
                "pathway": j,
                "LV index": i,
                "AUC": aucres["auc"],
                "p-value": aucres["pval"],
            }
            Uauc.loc[j, i] = aucres["auc"]
            Up.loc[j, i] = aucres["pval"]

    else:
        j = iipath
        iiheldout = (
            pd.concat([priorMat.loc[:, j], priorMatcv.loc[:, j]], axis=1)
            .apply(
                lambda x: True
                if (x[0] == 0) or ((x[0] > 0) and (x[1] == 0))
                else np.nan,
                axis=1,
            )
            .dropna()
            .index
        )
        aucres = AUC(priorMat.loc[iiheldout, j], plierRes.Z.loc[iiheldout, i])
        out_dict[j] = {
            "pathway": j,
            "LV index": i,
            "AUC": aucres["auc"],
            "p-value": aucres["pval"],
        }
        Uauc.loc[j, i] = aucres["auc"]
        Up.loc[j, i] = aucres["pval"]
out = pd.DataFrame.from_dict(out_dict, orient="index")
_, fdr, *_ = multipletests(out.loc[:, "p-value"], method="fdr_bh")
out.loc[:, "fdr"] = fdr
return {"Uauc": Uauc, "Upval": Up, "summary": out}


IndentationError: unexpected indent (<string>, line 4)