# ARCHS4 model building with PLIER2

## Load libraries

In [1]:
if (!requireNamespace("PLIER2", quietly = TRUE)) {
    REPO_PATH <- "/home/msubirana/Documents/pivlab/PLIER2" 
    remotes::install_local(REPO_PATH, force = TRUE, dependencies = FALSE)
}

library(bigstatsr)
library(data.table)
library(dplyr)
library(rsvd)
library(glmnet)
library(Matrix)
library(knitr)
library(here)
library(PLIER2)
library(hdf5r)
library(biomaRt)

source(here("config.R"))

set.seed(123)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:data.table’:

    between, first, last


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: Matrix

Loaded glmnet 4.1-10

here() starts at /home/msubirana/Documents/pivlab/plier2-analyses



## Output directory

In [2]:
output_dir <- config$ARCHS4$DATASET_FOLDER
dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)

In [3]:
archs4_svdRes <- readRDS(file.path(output_dir, "archs4_svdRes.rds"))

In [4]:
archs4_fbm_filt <- readRDS(file.path(output_dir, "archs4_fbm_filt.rds"))

In [5]:
archs4_baseRes <- readRDS(file.path(output_dir, "archs4_baseRes.rds"))

In [6]:
PLIER_K_archs4 <- readRDS(file.path(output_dir, "PLIER_K_archs4.rds"))

In [7]:
archs4_genes <- readRDS(file.path(output_dir, "archs4_genes.rds"))

In [8]:
sample_names <- readRDS(file.path(output_dir, "archs4_samples.rds"))

## Prepare pathway priors

In [9]:
# # run localy since server not have internet access

archs4_gmtList <- list(
  KEGG = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=KEGG_2021_Human"),
  BP = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=GO_Biological_Process_2025"),
  GTEx_Tissues = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=GTEx_Tissues_V8_2023")
)

# prefix each gene‐set name with its library to guarantee uniqueness
for(lib in names(archs4_gmtList)) {
  names(archs4_gmtList[[lib]]) <- paste0(lib, "_", names(archs4_gmtList[[lib]]))
}

archs4_pathMat <- gmtListToSparseMat(archs4_gmtList)
saveRDS(archs4_pathMat, file = file.path(output_dir, "archs4_pathMat.rds"))

Auto-detected name: KEGG_2021_Human

Using cached file for KEGG_2021_Human

Auto-detected name: GO_Biological_Process_2025

Using cached file for GO_Biological_Process_2025

Auto-detected name: GTEx_Tissues_V8_2023

Using cached file for GTEx_Tissues_V8_2023



In [10]:
archs4_pathMat <- readRDS(file.path(output_dir, "archs4_pathMat.rds"))
archs4_matched <- getMatchedPathwayMat(archs4_pathMat, archs4_genes)
archs4_chatObj <- getChat(archs4_matched)

There are 12486 genes in the intersection between data and prior



Removing 2409 pathways

Inverting...

done



In [11]:
saveRDS(archs4_pathMat, file = file.path(output_dir, "archs4_pathMat.rds"))
saveRDS(archs4_matched, file = file.path(output_dir, "archs4_matched.rds"))
saveRDS(archs4_chatObj, file = file.path(output_dir, "archs4_chatObj.rds"))

## PLIERfull

In [12]:
N_CORES <- config$ARCHS4$PLIER_PARAMS$N_CORES

archs4_fullRes <- PLIERfull(
  Y                 = archs4_fbm_filt,
  priorMat          = as.matrix(archs4_matched),
  svdres            = archs4_svdRes,
  plier.base.result = archs4_baseRes,
  Chat              = archs4_chatObj,
  k                 = PLIER_K_archs4,
  doCrossval        = TRUE,
  trace             = TRUE,
  max.U.updates     = 50,
  multiplier        = 3,
  ncores            = N_CORES
)

**PLIER v2 **

“`seed` is deprecated and ignored. Use set.seed(seed) before calling this function.”
using provided PLIERbase result

L1=111.157238320606; L2=37.0524127735352

Progress 1 / 350 | Bdiff=0.035954

Progress 2 / 350 | Bdiff=0.005951

, Number of annotated columns is 81

Progress 3 / 350 | Bdiff=0.002198

Progress 4 / 350 | Bdiff=0.001135

, Number of annotated columns is 89

Progress 5 / 350 | Bdiff=0.000741

Progress 6 / 350 | Bdiff=0.000557

, Number of annotated columns is 98

Progress 7 / 350 | Bdiff=0.000455

Progress 8 / 350 | Bdiff=0.000393

, Number of annotated columns is 99

Progress 9 / 350 | Bdiff=0.000361

Progress 10 / 350 | Bdiff=0.000327

, Number of annotated columns is 104

Progress 11 / 350 | Bdiff=0.000304

Progress 12 / 350 | Bdiff=0.000291

, Number of annotated columns is 83

Progress 13 / 350 | Bdiff=0.000276

Progress 14 / 350 | Bdiff=0.000269

, Number of annotated columns is 88

Progress 15 / 350 | Bdiff=0.000260

Progress 16 / 350 | Bdiff=0.000252

In [16]:
head(archs4_fullRes$summary) %>%  arrange(p_value)

Unnamed: 0_level_0,pathway,LV_index,AUC,p_value,FDR,npos,nneg
Unnamed: 0_level_1,<chr>,<int>,<dbl>,<dbl>,<dbl>,<int>,<int>
BP4736,BP_Regulation of Transforming Growth Factor Beta Receptor Signaling Pathway (GO:0017015),1,0.6419797,0.01577578,0.5728993,18,26153
BP5083,BP_Sterol Homeostasis (GO:0055092),1,0.6035796,0.1001172,0.7153877,12,26153
BP2435,BP_Negative Regulation of Vascular Associated Smooth Muscle Cell Proliferation (GO:1904706),1,0.5359328,0.42802824,0.8912681,2,26153
BP2993,BP_Positive Regulation of Collagen Biosynthetic Process (GO:0032967),1,0.5183057,0.46319504,0.8914728,2,26153
BP3507,BP_Positive Regulation of Vascular Associated Smooth Muscle Cell Proliferation (GO:1904707),1,0.5109102,0.47311888,0.8914728,3,26153
BP2151,BP_Negative Regulation of Interleukin-1 Beta Production (GO:0032691),1,0.5000287,0.49993192,0.8914728,4,26153


In [17]:
# save summary as CSV
data.table::fwrite(as.data.frame(archs4_fullRes$summary),
                   file = file.path(output_dir, "archs4_fullRes_summary.csv"))

In [13]:
archs4_fullRes$summary %>%  arrange(`p-value`)

ERROR: [1m[33mError[39m in `arrange()`:[22m
[1m[22m[36mℹ[39m In argument: `..1 = p-value`.
[1mCaused by error:[22m
[33m![39m object 'p-value' not found


In [None]:
saveRDS(archs4_fullRes, file = file.path(output_dir, "archs4_PLIER2.rds"))

# Fix col, row names and summary

In [None]:
archs4_fullRes <- readRDS(file.path(output_dir, "archs4_PLIER2.rds"))

colnames(archs4_fullRes$B) <- sample_names

colnames(archs4_fullRes$Z) <- paste0('LV', seq_len(ncol(archs4_fullRes$Z)))

archs4_fullRes$summary <- archs4_fullRes$summary %>%
    dplyr::rename(LV = `LV index`) %>% 
    dplyr::mutate(LV = paste0('LV', LV))

saveRDS(archs4_fullRes, file = file.path(output_dir, "archs4_PLIER2.rds"))

In [None]:
archs4_baseRes <- readRDS(file.path(output_dir, "archs4_baseRes.rds"))

colnames(archs4_baseRes$B) <- sample_names
colnames(archs4_baseRes$Z) <- paste0('LV', seq_len(ncol(archs4_fullRes$Z)))
rownames(archs4_baseRes$Z) <- archs4_genes
rownames(archs4_baseRes$B) <- paste0('LV', seq_len(ncol(archs4_baseRes$Z)))

saveRDS(archs4_baseRes, file = file.path(output_dir, "archs4_baseRes.rds"))

In [None]:
archs4_fullRes$summary  %>%  arrange(`p-value`)

pathway,LV,AUC,p-value,FDR
<chr>,<chr>,<dbl>,<dbl>,<dbl>
BP_Regulation of Canonical Wnt Signaling Pathway (GO:0060828),LV541,0.6456633,0.003006388,0.4680340
BP_Positive Regulation of SMAD Protein Signal Transduction (GO:0060391),LV229,0.8385313,0.004943516,0.4680340
BP_Cellular Response to Tumor Necrosis Factor (GO:0071356),LV284,0.7298559,0.007812690,0.4680340
BP_Negative Regulation of Cell Population Proliferation (GO:0008285),LV12,0.5836198,0.008895117,0.4680340
BP_Regulation of Focal Adhesion Assembly (GO:0051893),LV88,0.7475112,0.013385790,0.4680340
BP_Granulocyte Chemotaxis (GO:0071621),LV419,0.7221823,0.013673689,0.4680340
BP_Protein Import Into Nucleus (GO:0006606),LV52,0.7011634,0.013674025,0.4680340
BP_Cellular Response to Oxygen-Containing Compound (GO:1901701),LV259,0.5899547,0.013970482,0.4680340
BP_Detection of Chemical Stimulus Involved in Sensory Perception of Smell (GO:0050911),LV576,0.6824036,0.014162759,0.4680340
BP_Cellular Response to Acid Chemical (GO:0071229),LV425,0.9321450,0.014729899,0.4680340
