In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
from pathlib import Path

In [3]:
solve_u_data = Path.home().joinpath("workspace", "pyplier", "tests", "data", "solve_u")

In [4]:
z_file = solve_u_data / "z.csv.gz"
z = pd.read_csv(z_file, index_col="gene")

In [5]:
chat_file = solve_u_data / "chat.csv.gz"
chat = pd.read_csv(chat_file, index_col="pathway")

In [6]:
pm_file = solve_u_data / "prior_mat.csv.gz"
prior_mat = pd.read_csv(pm_file, index_col="gene")
prior_mat.columns.name = "pathway"

In [7]:
penalty_factor = np.loadtxt( solve_u_data / "penalty_factor.csv.gz")

In [8]:
u_file_complete = solve_u_data / "u_complete.csv.gz"
u_complete = pd.read_csv(u_file_complete, index_col="pathway")

In [9]:
u_complete.columns = np.subtract(u_complete.columns.str.replace("V", "").astype(int), 1)

In [10]:
u_complete = u_complete.astype(np.float64)

In [11]:
u_file_fast = solve_u_data / "u_fast.csv.gz"
u_fast = pd.read_csv(u_file_fast, index_col="pathway")
u_fast.columns = np.subtract(u_fast.columns.str.replace("V", "").astype(int), 1).astype(object)
u_fast = u_fast.astype(np.float64)

In [12]:
ur = chat @ z  # get U by OLS

In [13]:
ur = ur.rank(axis="index", ascending=False)  # rank

In [14]:
iip = np.where([ur.min(axis=1) <= 10])[1]

In [15]:
results = {}

In [16]:
u = np.zeros(shape=(prior_mat.shape[1], z.shape[1]))

In [17]:
lambdas = np.exp(np.arange(start=-4, stop=-12.125, step=-0.125))

In [18]:
l_mat = np.full((len(lambdas), z.shape[1]), np.nan)

In [19]:
from glmnet import ElasticNet

In [20]:
i = 0

In [22]:
# This is the old one
# gres = ElasticNet(
#     alpha=0.9,
#     lower_limits=0,
#     lambda_path=lambdas,
#     fit_intercept=True,
#     standardize=False,
#     random_state=0
#     # max_features=150,
# )
# ...
# gres.fit(
#     X=prior_mat.iloc[:, iip],
#     y=z.iloc[:, i],
#     relative_penalties=penalty_factor[iip],
# )

In [21]:
gres = ElasticNet(
    alpha=0.9,
    lower_limits=0,
    lambda_path=lambdas,
    fit_intercept=True,
    standardize=False,
    random_state=0
    # max_features=150,
)
...
gres.fit(
    X=prior_mat.iloc[:, iip],
    y=z.iloc[:, i],
    relative_penalties=penalty_factor[iip],
)

In [300]:
# OLD DO NOT RERUN!
gres.coef_

array([0.0013753 , 0.        , 0.00272214, 0.00124865, 0.00508011,
       0.0036505 , 0.00104467, 0.00063562, 0.00019187, 0.00117809])

In [22]:
gres.coef_

array([0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       1.63790450e-04, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.91897048e-03,
       8.53688131e-04, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.22410009e-04,
      

In [301]:
# OLD DO NOT RERUN
np.sum(np.where(gres.coef_path_ > 0, 1, 0), axis=0)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 7, 7, 7, 8, 9, 9, 9, 9, 9])

In [23]:
np.sum(np.where(gres.coef_path_ > 0, 1, 0), axis=0)

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  2,  2,  3,
        6,  6,  6,  9, 14, 19, 19, 20, 26, 29, 30, 32, 36, 38])