In [1]:
import numpy as np
import pandas as pd
from pathlib import Path

In [2]:
from math import floor

In [44]:
from scipy.linalg import solve
from statsmodels.stats.multitest import multipletests

In [4]:
from typing import Optional, Union
def crossprod(
    mat1: Union[np.array, pd.DataFrame],
    mat2: Optional[Union[np.array, pd.DataFrame]] = None,
) -> Union[np.array, pd.DataFrame]:

    if mat2 is None:
        return mat1.transpose() @ mat1
    else:
        return mat1.tranpose() @ mat2

def tcrossprod(
    mat1: Union[np.array, pd.DataFrame],
    mat2: Optional[Union[np.array, pd.DataFrame]] = None,
) -> Union[np.array, pd.DataFrame]:

    if mat2 is None:
        return mat1 @ mat1.transpose()
    else:
        return mat1 @ mat2.tranpose()

In [5]:
def copyMat(df: pd.DataFrame, zero: bool = False) -> pd.DataFrame:
    if zero:
        dfnew = pd.DataFrame(
            np.zeros(shape=df.shape), index=df.index, columns=df.columns
        )
    else:
        dfnew = df.copy(deep=True)

    return dfnew

In [6]:
from typing import Dict, Tuple

import numpy as np
import pandas as pd
from scipy.stats import mannwhitneyu, norm


def AUC(labels: pd.Series, values: pd.Series) -> Dict[str, float]:
    posii = labels[labels > 0]
    negii = labels[labels <= 0]
    posn = len(posii)
    negn = len(negii)
    posval = values[posii.index]
    negval = values[negii.index]
    if posn > 0 and negn > 0:
        statistic, pvalue = mannwhitneyu(posval, negval, alternative="greater")
        conf_int_low, conf_int_high = mannwhitneyu_conf_int(posval, negval)
        res = {
            "low": conf_int_low,
            "high": conf_int_high,
            "auc": (statistic / (posn * negn)),
            "pval": pvalue,
        }
    else:
        res = {"auc": 0.5, "pval": np.nan}

    return res


def mannwhitneyu_conf_int(
    x: np.array, y: np.array, alpha: float = 0.05
) -> Tuple[float, float]:
    """
    see: https://www.ncbi.nlm.nih.gov/labs/pmc/articles/PMC2545906/pdf/bmj00286-0037.pdf
    """
    n = len(x)
    m = len(y)

    N = norm.ppf(1 - alpha / 2)

    diffs = sorted([i - j for i in x for j in y])

    # For an approximate 100(1-a)% confidence interval first calculate K:
    nm = n * m
    top = nm * (n + m + 1)
    right = N * np.sqrt(top / 12)
    left = (n * m) / 2
    K = left - right

    # The Kth smallest to the Kth largest of the n x m differences
    # lx and ly should be > ~20
    return (diffs[round(K)], diffs[len(diffs) - round(K)])


In [7]:
dataPath = Path.cwd().parent / "tests" / "data"

plierRes_b = pd.read_csv(filepath_or_buffer=dataPath.joinpath("getAUC", "getAUC_b.csv"), index_col=0)
plierRes_c = pd.read_csv(filepath_or_buffer=dataPath.joinpath("getAUC", "getAUC_c.csv"), index_col=0)
plierRes_residual = pd.read_csv(filepath_or_buffer=dataPath.joinpath("getAUC", "getAUC_residual.csv"), index_col=0)
plierRes_u = pd.read_csv(filepath_or_buffer=dataPath.joinpath("getAUC", "getAUC_u.csv"), index_col=0)
plierRes_z = pd.read_csv(filepath_or_buffer=dataPath.joinpath("getAUC", "getAUC_z.csv"), index_col=0)
priorMat = pd.read_csv(filepath_or_buffer=dataPath.joinpath("getAUC", "getAUC_priormat.csv"), index_col=0)
data = pd.read_csv(filepath_or_buffer=dataPath.joinpath("getAUC", "getAUC_data.csv"), index_col=0)
L1 = 18.43058
L2 = 36.86117
L3 = 0.0005530844

plierRes = {
    "B": plierRes_b,
    "C": plierRes_c,
    "residual": plierRes_residual,
    "U": plierRes_u,
    "Z": plierRes_z,
    "heldOutGenes": list(),
    "L1": L1,
    "L2": L2,
    "L3": L3,
}

In [8]:
B = plierRes["B"]
Z = plierRes["Z"]
Zcv = Z.copy(deep=True)
k = Z.shape[1]
L1 = plierRes["L1"]
L2 = plierRes["L2"]
U = plierRes["U"]

In [9]:
for i in range(5):
    ii = [
        (_ * 5 + i) + 1
        for _ in range(floor(data.shape[0] / 5))
        if (_ * 5 + i) + 1 <= Z.shape[0]
    ]
    Z_not_ii = Z.loc[~Z.index.isin(Z.iloc[ii, :].index)]
    data_not_ii = data.loc[~data.index.isin(data.iloc[ii, :].index)]
    solve_a = crossprod(Z_not_ii) + L2 * np.identity(k)
    Bcv = (
        solve(solve_a, np.identity(solve_a.shape[0]))
        @ Z_not_ii.transpose()
        @ data_not_ii
    )
    solve_Bcv = tcrossprod(Bcv) + L1 * np.identity(k)
    Zcv.iloc[ii, :] = (
        data.iloc[ii, :]
        @ Bcv.transpose()
        @ solve(solve_Bcv, np.identity(solve_Bcv.shape[0]))
    )

In [7]:
i = 0

In [31]:
Zcv

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V21,V22,V23,V24,V25,V26,V27,V28,V29,V30
GAS6,0.000000,0.000000,0.022502,0.000000,0.103013,0.020453,0.000000,0.288617,0.000000,0.025918,...,0.000000,0.000000,0.000000,0.059422,0.066729,0.275235,0.093106,0.000000,0.000000,0.000000
MMP14,0.038050,0.014748,-0.055941,0.080094,0.288758,0.019190,-0.115848,0.048184,-0.025877,-0.012543,...,0.044078,-0.139670,0.112228,0.018012,0.126677,0.030868,-0.060554,-0.022092,0.044428,0.099449
MARCKSL1,-0.147623,-0.014834,-0.032980,-0.093735,-0.046514,0.051302,0.020376,-0.086933,-0.082585,-0.015860,...,0.064108,-0.044998,0.256823,-0.046767,0.025115,-0.061729,-0.030458,0.128204,-0.057130,0.121565
SPARC,-0.104378,-0.052494,-0.024552,-0.041341,-0.143555,0.004553,-0.006794,-0.020849,-0.006580,-0.047561,...,-0.003705,-0.088583,-0.110099,-0.074927,-0.005324,-0.078259,-0.030697,-0.054675,-0.001710,0.047357
CTSD,-0.091206,0.048282,-0.048897,-0.102790,0.172774,-0.099107,-0.085798,-0.015955,0.005584,-0.084155,...,-0.026951,-0.017124,0.228117,0.006154,-0.056024,-0.095483,0.021388,0.010487,-0.020468,0.064834
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CFL2,0.090927,-0.065132,0.008361,-0.209056,-0.035109,-0.010070,0.079065,0.027552,0.112270,0.021615,...,-0.056714,0.020995,0.034295,0.035319,-0.074395,0.071437,0.146451,-0.051995,0.111582,0.127067
CFL1,-0.100206,-0.065607,-0.108225,-0.025494,0.138490,-0.080402,0.033448,-0.003613,0.079893,-0.024829,...,0.046077,-0.025520,0.201743,0.015301,0.037083,-0.094721,-0.040541,0.092377,-0.028161,0.092574
SELL,0.028093,0.168528,-0.022077,0.014427,0.008410,0.028436,-0.012936,-0.034964,-0.136232,0.086458,...,0.043979,0.072442,-0.025388,0.021547,-0.046604,-0.005054,0.189344,-0.033893,0.061054,-0.214933
GNGT2,-0.004245,-0.062409,0.000584,-0.063880,0.128631,-0.148998,-0.062279,-0.155118,0.299235,-0.050159,...,-0.040807,-0.092631,0.182112,-0.008000,0.073505,0.072234,0.086862,-0.047342,-0.060798,0.138393


In [15]:
ii = [
    (_ * 5 + i) + 1
    for _ in range(floor(data.shape[0] / 5))
    if (_ * 5 + i) + 1 <= Z.shape[0]
]

In [17]:
ii[-1]

5886

In [18]:
Z_not_ii = Z.loc[~Z.index.isin(Z.iloc[ii, :].index)]

In [19]:
data_not_ii = data.loc[~data.index.isin(data.iloc[ii, :].index)]

In [22]:
solve_a = crossprod(Z_not_ii) + L2 * np.identity(k)

In [23]:
Bcv = (solve(solve_a, np.identity(solve_a.shape[0])) @ Z_not_ii.transpose() @ data_not_ii)

In [24]:
Bcv

Unnamed: 0,BD8001,BD8002,BD8003,BD8004,BD8005,BD8006,BD8007,BD8008,BD8009,BD8010,...,BD8031,BD8032,BD8033,BD8034,BD8038,BD8041,BD8042,BD8043,BD8044,BD8045
0,0.947571,-0.132508,1.509004,2.393521,-0.035493,0.774027,-1.481193,-0.720251,0.259149,1.480255,...,1.375419,-0.178826,1.002101,1.107863,0.948194,-0.114035,-0.002131,-0.510999,-0.246629,-3.064323
1,-0.684255,-1.048453,0.223134,1.665656,0.859908,-0.340332,-2.384912,0.668305,-2.514738,-0.703167,...,2.226841,0.364954,-1.003077,-3.731396,0.403282,-2.321695,0.499669,-0.097069,-0.189895,0.322708
2,-0.109856,-0.773849,0.915137,-2.656212,-0.845622,-0.757291,-0.607072,-1.056017,-0.146299,-0.329871,...,-0.402049,-0.709115,-1.133967,-1.269042,1.782554,0.504082,0.970376,0.925436,-0.359141,0.048087
3,-1.199131,-0.87196,0.067912,-0.151084,0.356012,0.734038,-0.911627,0.611327,-0.42633,-1.200356,...,0.005185,-3.482031,0.988019,1.964162,0.05614,0.331753,-0.101162,0.637476,-0.225674,-0.277395
4,0.935333,0.105641,-0.880461,0.453122,0.620007,0.008609,0.398311,0.383927,-1.203476,1.025746,...,0.845117,-0.947889,-0.242429,-0.449987,-0.232797,0.124005,-1.58863,1.209809,0.760953,1.177316
5,-1.481214,-0.027966,0.009159,-0.38942,0.699828,0.521401,0.372682,-0.59859,0.856876,-0.017581,...,-0.274816,-0.428733,1.31139,1.790426,0.58834,0.096299,0.572137,-0.955384,-0.580788,-1.564856
6,2.535737,2.306588,1.124039,2.174566,0.550771,1.854271,-1.395798,0.08491,1.901553,-1.458301,...,0.623765,1.893398,0.471117,1.220886,0.171491,0.565528,0.27446,-0.566941,0.34483,-2.442336
7,0.05681,0.006803,3.904333,-0.19631,-0.815283,-0.557497,1.241831,1.016292,0.58324,0.04249,...,-0.760259,1.114141,0.505069,0.644206,-0.430771,0.482332,-0.848589,-0.00625,0.582876,0.078328
8,0.430997,0.394726,-0.71469,-0.669204,-1.210188,0.817192,-0.10287,0.037972,-0.532598,-0.050293,...,-1.103035,0.96337,-0.66493,0.433333,0.023666,0.263625,0.209742,1.02315,-1.130835,-0.46085
9,-0.131339,0.113028,-0.243809,0.910379,0.034384,-0.509343,-0.667097,0.297875,0.125168,0.061894,...,1.097484,-0.07664,0.225062,-0.636541,0.287572,-0.309309,-0.228142,-0.94343,-0.702418,-0.067314


In [10]:
out = pd.DataFrame(
        data=np.empty(shape=(0, 4)), columns=["pathway", "LV index", "AUC", "p-value"]
    )
ii = U.loc[:, np.sum(a=U, axis=0) > 0].columns
Uauc = copyMat(df=U, zero=True)
Up = copyMat(df=U, zero=True)

In [13]:
for i in ii:
    iipath = U.loc[(U.loc[:, i] > 0), i].index
    for j in iipath:
        aucres = AUC(priorMat.loc[:, j], Zcv.loc[:, i])

        out = out.append(
            other=pd.DataFrame(
                {
                    "pathway": [j],
                    "LV index": [i],
                    "AUC": [aucres["auc"]],
                    "p-value": [aucres["pval"]],
                }
            )
        )

        Uauc.loc[j, i] = aucres["auc"]
        Up.loc[j, i] = aucres["pval"]

In [16]:
out["LV index"].apply(lambda x: x.strip("LV"))

0     1
0     1
0     1
0     1
0     1
     ..
0    30
0    30
0    30
0    30
0    30
Name: LV index, Length: 89, dtype: object

In [19]:
out["LV index"] = out["LV index"].apply(lambda x: x.strip("LV"))

In [20]:
out

Unnamed: 0,pathway,LV index,AUC,p-value
0,DMAP_ERY4,1,0.517544,1.477402e-01
0,DMAP_ERY5,1,0.518687,1.329293e-01
0,REACTOME_CELL_CYCLE,1,0.547871,1.736398e-03
0,REACTOME_GENERIC_TRANSCRIPTION_PATHWAY,1,0.654224,2.947932e-18
0,REACTOME_IMMUNE_SYSTEM,1,0.531293,2.709537e-03
...,...,...,...,...
0,KEGG_RIBOSOME,30,0.970913,1.908210e-48
0,MIPS_NOP56P_ASSOCIATED_PRE_RRNA_COMPLEX,30,0.891128,6.922493e-40
0,KEGG_PARKINSONS_DISEASE,30,0.738336,1.414587e-15
0,REACTOME_TRANSLATION,30,0.913064,2.961842e-62


In [37]:
ii

Index(['LV1', 'LV2', 'LV3', 'LV4', 'LV5', 'LV6', 'LV7', 'LV8', 'LV9', 'LV10',
       'LV11', 'LV14', 'LV15', 'LV16', 'LV18', 'LV20', 'LV23', 'LV26', 'LV27',
       'LV29', 'LV30'],
      dtype='object')

In [39]:
"LV1".strip("LV")

'1'

In [21]:
Uauc

Unnamed: 0,LV1,LV2,LV3,LV4,LV5,LV6,LV7,LV8,LV9,LV10,...,LV21,LV22,LV23,LV24,LV25,LV26,LV27,LV28,LV29,LV30
IRIS_Bcell-Memory_IgG_IgA,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
IRIS_Bcell-Memory_IgM,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
IRIS_Bcell-naive,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
IRIS_CD4Tcell-N0,0.0,0.0,0.0,0.0,0.0,0.797312,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
IRIS_CD4Tcell-Th1-restimulated12hour,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PID_IL4_2PATHWAY,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
REACTOME_SIGNALING_BY_THE_B_CELL_RECEPTOR_BCR,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PID_BCR_5PATHWAY,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PID_TELOMERASEPATHWAY,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
expected_Uauc = pd.read_csv(filepath_or_buffer=dataPath.joinpath("getAUC", "getAUC_uauc.csv"), index_col=0)

In [23]:
expected_Uauc

Unnamed: 0,LV1,LV2,LV3,LV4,LV5,LV6,LV7,LV8,LV9,LV10,...,LV21,LV22,LV23,LV24,LV25,LV26,LV27,LV28,LV29,LV30
IRIS_Bcell-Memory_IgG_IgA,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0,0,0.0,0,0,0.0,0.0,0,0.0,0.0
IRIS_Bcell-Memory_IgM,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0,0,0.0,0,0,0.0,0.0,0,0.0,0.0
IRIS_Bcell-naive,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0,0,0.0,0,0,0.0,0.0,0,0.0,0.0
IRIS_CD4Tcell-N0,0.0,0.0,0.0,0.0,0.0,0.797034,0.0,0.0,0.0,0.0,...,0,0,0.0,0,0,0.0,0.0,0,0.0,0.0
IRIS_CD4Tcell-Th1-restimulated12hour,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0,0,0.0,0,0,0.0,0.0,0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PID_IL4_2PATHWAY,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0,0,0.0,0,0,0.0,0.0,0,0.0,0.0
REACTOME_SIGNALING_BY_THE_B_CELL_RECEPTOR_BCR,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0,0,0.0,0,0,0.0,0.0,0,0.0,0.0
PID_BCR_5PATHWAY,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0,0,0.0,0,0,0.0,0.0,0,0.0,0.0
PID_TELOMERASEPATHWAY,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0,0,0.0,0,0,0.0,0.0,0,0.0,0.0


In [26]:
606*0.0082508

4.999984800000001

In [24]:
pd.testing.assert_frame_equal(Uauc, expected_Uauc)

AssertionError: DataFrame.iloc[:, 0] (column name="LV1") are different

DataFrame.iloc[:, 0] (column name="LV1") values are different (0.82508 %)
[index]: [IRIS_Bcell-Memory_IgG_IgA, IRIS_Bcell-Memory_IgM, IRIS_Bcell-naive, IRIS_CD4Tcell-N0, IRIS_CD4Tcell-Th1-restimulated12hour, IRIS_CD4Tcell-Th1-restimulated48hour, IRIS_CD4Tcell-Th2-restimulated12hour, IRIS_CD4Tcell-Th2-restimulated48hour, IRIS_CD8Tcell-N0, IRIS_DendriticCell-Control, IRIS_DendriticCell-LPSstimulated, IRIS_MemoryTcell-RO-activated, IRIS_MemoryTcell-RO-unactivated, IRIS_Monocyte-Day0, IRIS_Monocyte-Day1, IRIS_Monocyte-Day7, IRIS_NKcell-IL15stimulated, IRIS_NKcell-IL2stimulated, IRIS_NKcell-control, IRIS_Neutrophil-Resting, IRIS_PlasmaCell-FromBoneMarrow, IRIS_PlasmaCell-FromPBMC, DMAP_BASO1, DMAP_BCELLA1, DMAP_BCELLA2, DMAP_BCELLA3, DMAP_BCELLA4, DMAP_CMP, DMAP_DENDA1, DMAP_DENDA2, DMAP_EOS2, DMAP_ERY1, DMAP_ERY2, DMAP_ERY3, DMAP_ERY4, DMAP_ERY5, DMAP_GMP, DMAP_GRAN1, DMAP_GRAN2, DMAP_GRAN3, DMAP_HSC1, DMAP_HSC3, DMAP_MEGA1, DMAP_MEGA2, DMAP_MEP, DMAP_MONO1, DMAP_MONO2, DMAP_NKA1, DMAP_NKA2, DMAP_NKA3, DMAP_NKA4, DMAP_PRE_BCELL2, DMAP_PRE_BCELL3, DMAP_TCELLA1, DMAP_TCELLA2, DMAP_TCELLA3, DMAP_TCELLA4, DMAP_TCELLA6, DMAP_TCELLA7, DMAP_TCELLA8, Custom_Treg, PID_CASPASE_PATHWAY, PID_P38ALPHABETADOWNSTREAMPATHWAY, REACTOME_APC_C_CDC20_MEDIATED_DEGRADATION_OF_MITOTIC_PROTEINS, REACTOME_REGULATION_OF_APOPTOSIS, REACTOME_IRON_UPTAKE_AND_TRANSPORT, PID_ERBB1_DOWNSTREAM_PATHWAY, KEGG_VASCULAR_SMOOTH_MUSCLE_CONTRACTION, KEGG_ERBB_SIGNALING_PATHWAY, PID_ERBB1_RECEPTOR_PROXIMAL_PATHWAY, BIOCARTA_PDGF_PATHWAY, BIOCARTA_CHREBP2_PATHWAY, REACTOME_G1_S_TRANSITION, REACTOME_RNA_POL_II_PRE_TRANSCRIPTION_EVENTS, REACTOME_RNA_POL_II_TRANSCRIPTION_PRE_INITIATION_AND_PROMOTER_OPENING, ST_P38_MAPK_PATHWAY, PID_TAP63PATHWAY, PID_REG_GR_PATHWAY, REACTOME_CLASS_B_2_SECRETIN_FAMILY_RECEPTORS, REACTOME_REGULATION_OF_WATER_BALANCE_BY_RENAL_AQUAPORINS, REACTOME_TRANSMISSION_ACROSS_CHEMICAL_SYNAPSES, REACTOME_METABOLISM_OF_PROTEINS, REACTOME_PI_3K_CASCADE, REACTOME_M_G1_TRANSITION, BIOCARTA_BIOPEPTIDES_PATHWAY, REACTOME_SIGNALING_BY_PDGF, REACTOME_GOLGI_ASSOCIATED_VESICLE_BIOGENESIS, BIOCARTA_INTEGRIN_PATHWAY, REACTOME_AMYLOIDS, REACTOME_PRE_NOTCH_EXPRESSION_AND_PROCESSING, PID_IL8CXCR2_PATHWAY, REACTOME_INSULIN_RECEPTOR_SIGNALLING_CASCADE, REACTOME_DIABETES_PATHWAYS, REACTOME_SIGNAL_TRANSDUCTION_BY_L1, KEGG_CYTOKINE_CYTOKINE_RECEPTOR_INTERACTION, REACTOME_SCFSKP2_MEDIATED_DEGRADATION_OF_P27_P21, REACTOME_GLYCOSAMINOGLYCAN_METABOLISM, PID_CXCR4_PATHWAY, KEGG_BASAL_CELL_CARCINOMA, REACTOME_MITOCHONDRIAL_PROTEIN_IMPORT, ...]
[left]:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5175443971488243, 0.5186873908648102, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...]
[right]: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.51752149061949, 0.518660394265233, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...]

In [27]:
Uauc["LV1"].compare(expected_Uauc["LV1"])

Unnamed: 0,self,other
DMAP_ERY4,0.517544,0.517521
DMAP_ERY5,0.518687,0.51866
REACTOME_CELL_CYCLE,0.547871,0.547764
REACTOME_GENERIC_TRANSCRIPTION_PATHWAY,0.654224,0.654048
REACTOME_IMMUNE_SYSTEM,0.531293,0.531199


In [32]:
Uauc["LV2"].compare(expected_Uauc["LV2"]).apply(lambda x: x["other"]/x["self"], axis=1)

IRIS_Neutrophil-Resting                           0.999636
DMAP_MONO2                                        0.999652
PID_IL6_7PATHWAY                                  0.999610
REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM      0.999676
KEGG_NATURAL_KILLER_CELL_MEDIATED_CYTOTOXICITY    0.999699
REACTOME_METABOLISM_OF_LIPIDS_AND_LIPOPROTEINS    0.999677
dtype: float64

As these are 99.9% in agreement, I'm chalking this up to rounding errors/seed difference/under-the-hood difference in how numpy calculates things

In [45]:
_, fdr, *_ = multipletests(out.loc[:, "p-value"], method="fdr_bh")
out.loc[:, "FDR"] = fdr

In [47]:
Uauc.to_csv(dataPath.joinpath("getAUC", "getAUC_uauc.csv"))
Up.to_csv(dataPath.joinpath("getAUC", "getAUC_up.csv"))
out.reset_index(drop=True).to_csv(dataPath.joinpath("getAUC", "getAUC_summary.csv"))