# ARCHS4 model building with PLIER2

## Load libraries

In [1]:
if (!requireNamespace("PLIER2", quietly = TRUE)) {
    REPO_PATH <- "/home/msubirana/Documents/pivlab/PLIER2" 
    remotes::install_local(REPO_PATH, force = TRUE, dependencies = FALSE)
}

library(bigstatsr)
library(data.table)
library(dplyr)
library(rsvd)
library(glmnet)
library(Matrix)
library(knitr)
library(here)
library(PLIER2)
library(hdf5r)
library(biomaRt)

source(here("config.R"))

set.seed(123)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:data.table’:

    between, first, last


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: Matrix

Loaded glmnet 4.1-10

here() starts at /home/msubirana/Documents/pivlab/plier2-analyses



## Output directory

In [2]:
output_dir <- config$ARCHS4$DATASET_FOLDER
dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)

In [None]:
archs4_svdRes <- readRDS(file.path(output_dir, "archs4_svdRes.rds"))

In [None]:
archs4_fbm_filt <- readRDS(file.path(output_dir, "archs4_fbm_filt.rds"))

In [None]:
archs4_baseRes <- readRDS(file.path(output_dir, "archs4_baseRes.rds"))

In [None]:
PLIER_K_archs4 <- readRDS(file.path(output_dir, "PLIER_K_archs4.rds"))

In [4]:
archs4_genes <- readRDS(file.path(output_dir, "archs4_genes.rds"))

In [None]:
sample_names <- readRDS(file.path(output_dir, "archs4_samples.rds"))

## Prepare pathway priors

In [None]:
# # run localy since server not have internet access

# archs4_gmtList <- list(
#   KEGG = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=KEGG_2021_Human"),
#   BP = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=GO_Biological_Process_2025"),
#   GTEx_Tissues = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=GTEx_Tissues_V8_2023")
# )

# # prefix each gene‐set name with its library to guarantee uniqueness
# for(lib in names(archs4_gmtList)) {
#   names(archs4_gmtList[[lib]]) <- paste0(lib, "_", names(archs4_gmtList[[lib]]))
# }

# archs4_pathMat <- gmtListToSparseMat(archs4_gmtList)
# saveRDS(archs4_pathMat, file = file.path(output_dir, "archs4_pathMat.rds"))

Auto-detected name: KEGG_2021_Human

Using cached file for KEGG_2021_Human

Auto-detected name: GO_Biological_Process_2025

Downloading GO_Biological_Process_2025 from Enrichr...

Auto-detected name: GTEx_Tissues_V8_2023

Using cached file for GTEx_Tissues_V8_2023



In [None]:
archs4_pathMat <- readRDS(file.path(output_dir, "archs4_pathMat.rds"))
archs4_matched <- getMatchedPathwayMat(archs4_pathMat, archs4_genes)
archs4_chatObj <- getChat(archs4_matched)

There are 9109 genes in the intersection between data and prior



Removing 3024 pathways

Inverting...

done



In [None]:
saveRDS(archs4_pathMat, file = file.path(output_dir, "archs4_pathMat.rds"))
saveRDS(archs4_matched, file = file.path(output_dir, "archs4_matched.rds"))
saveRDS(archs4_chatObj, file = file.path(output_dir, "archs4_chatObj.rds"))

## PLIERfull

In [None]:
N_CORES <- config$ARCHS4$PLIER_PARAMS$N_CORES

archs4_fullRes <- PLIERfull(
  Y                 = archs4_fbm_filt,
  priorMat          = as.matrix(archs4_matched),
  svdres            = archs4_svdRes,
  plier.base.result = archs4_baseRes,
  Chat              = archs4_chatObj,
  k                 = PLIER_K_archs4,
  doCrossval        = TRUE,
  trace             = TRUE,
  max.U.updates     = 50,
  ncores            = N_CORES
)

**PLIER v2 **

“`seed` is deprecated and ignored. Use set.seed(seed) before calling this function.”
using provided PLIERbase result

L1=22.4924948346732; L2=67.4774845040197

Progress 1 / 350 | Bdiff=0.000471

Progress 2 / 350 | Bdiff=0.000449

, Number of annotated columns is 103

Progress 3 / 350 | Bdiff=0.000427

Progress 4 / 350 | Bdiff=0.000423

, Number of annotated columns is 112

Progress 5 / 350 | Bdiff=0.000422

Progress 6 / 350 | Bdiff=0.000414

, Number of annotated columns is 119

Progress 7 / 350 | Bdiff=0.000413

Progress 8 / 350 | Bdiff=0.000378

, Number of annotated columns is 124

Progress 9 / 350 | Bdiff=0.000366

Progress 10 / 350 | Bdiff=0.000347

, Number of annotated columns is 127

Progress 11 / 350 | Bdiff=0.000330

Progress 12 / 350 | Bdiff=0.000310

, Number of annotated columns is 114

Progress 13 / 350 | Bdiff=0.000302

Progress 14 / 350 | Bdiff=0.000284

, Number of annotated columns is 120

Progress 15 / 350 | Bdiff=0.000274

Progress 16 / 350 | Bdiff=0.

In [13]:
saveRDS(archs4_fullRes, file = file.path(output_dir, "archs4_PLIER2.rds"))

# Fix col, row names and summary

In [14]:
archs4_fullRes <- readRDS(file.path(output_dir, "archs4_PLIER2.rds"))

colnames(archs4_fullRes$B) <- sample_names

colnames(archs4_fullRes$Z) <- paste0('LV', seq_len(ncol(archs4_fullRes$Z)))

archs4_fullRes$summary <- archs4_fullRes$summary %>%
    dplyr::rename(LV = `LV index`) %>% 
    dplyr::mutate(LV = paste0('LV', LV))

saveRDS(archs4_fullRes, file = file.path(output_dir, "archs4_PLIER2.rds"))

In [15]:
archs4_baseRes <- readRDS(file.path(output_dir, "archs4_baseRes.rds"))

colnames(archs4_baseRes$B) <- sample_names
colnames(archs4_baseRes$Z) <- paste0('LV', seq_len(ncol(archs4_fullRes$Z)))
rownames(archs4_baseRes$Z) <- archs4_genes
rownames(archs4_baseRes$B) <- paste0('LV', seq_len(ncol(archs4_baseRes$Z)))

saveRDS(archs4_baseRes, file = file.path(output_dir, "archs4_baseRes.rds"))