In [2]:
import warnings
from copy import deepcopy
from typing import Optional, TypedDict

import numpy as np
import pandas as pd
from glmnet import ElasticNet

In [3]:
class solveUReturnDict(TypedDict):
    U: pd.DataFrame
    L3: float

In [4]:
pathwaySelection: str = "fast"
glm_alpha: float = 0.9
maxPath: int = 10
target_frac: float = 0.7
L3: Optional[float] = None

In [7]:
%store -r Z
%store -r Chat
%store -r C
%store -r penalty_factor
%store -r pathwaySelection
%store -r glm_alpha
%store -r maxPath
%store -r frac

In [10]:
priorMat=C
target_frac=frac

In [11]:
Ur = Chat @ Z  # get U by OLS

Ur = Ur.rank(axis="index", ascending=False)  # rank
Urm = Ur.min(axis=1)

In [12]:
lambdas = np.exp(np.arange(start=-4, stop=-12.125, step=-0.125))
results = dict()
lMat = np.full((len(lambdas), Z.shape[1]), np.nan)

for i in range(Z.shape[1]):
    if pathwaySelection == "fast":
        iip = np.where([Ur.iloc[:, i] <= maxPath])[1]
    else:
        iip = np.where([Urm <= maxPath])[1]

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        gres = ElasticNet(
            lambda_path=lambdas,
            lower_limits=0,
            standardize=False,
            fit_intercept=True,
            alpha=glm_alpha,
            max_features=150,
        )

        gres.fit(
            y=Z.iloc[:, i].astype(np.float64).values,
            X=priorMat.iloc[:, iip].astype(np.float64).values,
            relative_penalties=[penalty_factor[_] for _ in iip],
        )

    gres.iip = iip
    lMat[:, i] = np.sum(np.where(gres.coef_path_ > 0, 1, 0), axis=0)
    results[i] = deepcopy(gres)

fracs = np.mean(np.where(lMat > 0, 1, 0), axis=1)
iibest = np.where(
    abs(target_frac - fracs) == abs((target_frac - fracs)).min())[0][0]

In [14]:
# R version
# for (i in 1:ncol(Z)) {
#   U[results[[i]]$iip, i] <- results[[i]]$beta[, iibest]
# } # for i

In [17]:
i = 0

In [19]:
results[i].iip

array([208, 210, 232, 252, 276, 289, 315, 326, 338, 350, 360, 371, 380,
       382, 418, 419, 425, 436, 451, 457, 466, 472, 480, 493, 516, 517,
       526, 527, 529, 531, 533, 535, 543, 555, 560, 572, 574, 575, 576,
       577, 578, 579, 580, 590])

In [31]:
#U <- matrix(0, nrow = ncol(priorMat), ncol = ncol(Z))
U = pd.DataFrame(np.zeros((priorMat.shape[1], Z.shape[1])))

In [32]:
U.shape

(606, 30)

In [33]:
U.iloc[results[i].iip,i]

208    0.0
210    0.0
232    0.0
252    0.0
276    0.0
289    0.0
315    0.0
326    0.0
338    0.0
350    0.0
360    0.0
371    0.0
380    0.0
382    0.0
418    0.0
419    0.0
425    0.0
436    0.0
451    0.0
457    0.0
466    0.0
472    0.0
480    0.0
493    0.0
516    0.0
517    0.0
526    0.0
527    0.0
529    0.0
531    0.0
533    0.0
535    0.0
543    0.0
555    0.0
560    0.0
572    0.0
574    0.0
575    0.0
576    0.0
577    0.0
578    0.0
579    0.0
580    0.0
590    0.0
Name: 0, dtype: float64

In [40]:
U.iloc[results[i].iip,i] = results[i].coef_path_[:,iibest]

In [41]:
U.iloc[results[i].iip,i]

208    0.014307
210    0.004602
232    0.000000
252    0.000000
276    0.000000
289    0.000000
315    0.000000
326    0.000000
338    0.000000
350    0.000000
360    0.000000
371    0.000000
380    0.000000
382    0.000000
418    0.000000
419    0.000000
425    0.000000
436    0.000000
451    0.000000
457    0.000000
466    0.000000
472    0.000000
480    0.000000
493    0.000000
516    0.000000
517    0.000000
526    0.000000
527    0.000000
529    0.000000
531    0.000000
533    0.000000
535    0.000000
543    0.000000
555    0.000000
560    0.000000
572    0.005191
574    0.000000
575    0.000000
576    0.000000
577    0.000000
578    0.000000
579    0.000000
580    0.000000
590    0.000000
Name: 0, dtype: float64

In [42]:
U

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
601,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
602,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
603,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
604,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [44]:
U.index = priorMat.columns
U.columns = Z.columns

In [45]:
U

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
IRIS_Bcell-Memory_IgG_IgA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
IRIS_Bcell-Memory_IgM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
IRIS_Bcell-naive,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
IRIS_CD4Tcell-N0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
IRIS_CD4Tcell-Th1-restimulated12hour,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PID_IL4_2PATHWAY,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
REACTOME_SIGNALING_BY_THE_B_CELL_RECEPTOR_BCR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PID_BCR_5PATHWAY,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PID_TELOMERASEPATHWAY,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
results[i].coef_path_[:,iibest]

array([0.0143065 , 0.00460198, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.00519114, 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        ])

In [26]:
results[i].coef_path_.shape

(44, 65)

In [27]:
iibest

30

In [None]:
for i in Z.shape(1):
    

In [None]:
U = (
    pd.DataFrame(
        index=(
            priorMat
                .columns
                .set_names("pathway")
        )
        .merge(
            pd.DataFrame(
                data={
                        i: pd.Series(
                            data=results[i].coef_path_[:, iibest],
                            index=Ur.index[results[i].iip].set_names("pathway")
                            )
                        for i in range(Z.shape[1])
                    }, 
            ),
            on="pathway",
            how="left",
        ).fillna(0)
    )
)