# ARCHS4 model building with PLIER2

## Load libraries

In [1]:
if (!requireNamespace("PLIER2", quietly = TRUE)) {
    REPO_PATH <- "/home/msubirana/Documents/pivlab/PLIER2" 
    remotes::install_local(REPO_PATH, force = TRUE, dependencies = FALSE)
}

library(bigstatsr)
library(data.table)
library(dplyr)
library(rsvd)
library(glmnet)
library(Matrix)
library(knitr)
library(here)
library(PLIER2)
library(hdf5r)
library(biomaRt)

source(here("config.R"))

set.seed(123)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:data.table’:

    between, first, last


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: Matrix

Loaded glmnet 4.1-10

here() starts at /home/msubirana/Documents/pivlab/plier2-analyses



## Output directory

In [None]:
output_dir <- config$ARCHS4$DATASET_FOLDER
dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)

In [None]:
output_dir_milton <- '/pividori_lab/projects/plier2/plier2_repo/output/archs4'

# meta
meta <- readRDS(file.path(output_dir_milton, "metadata_filtered.rds"))
n_genes_thin <- meta$n_genes_thin
n_samples <- meta$n_samples

# fbm
fbm_file  <- file.path(output_dir_milton, "fbm")
output_file <- paste0(fbm_file, "_filtered")

archs4_fbm_filt <- FBM(
  nrow        = n_genes_thin,
  ncol        = n_samples,
  backingfile = output_file ,
  create_bk   = FALSE,
)

# svd
archs4_svdRes <- readRDS(file.path(output_dir_milton, "svd.rds"))
archs4_genes <- meta$gene_symbols_thin

In [5]:
archs4_baseRes <- readRDS(file.path(output_dir, "archs4_baseRes.rds"))

In [6]:
PLIER_K_archs4 <- readRDS(file.path(output_dir, "PLIER_K_archs4.rds"))

In [None]:
sample_names <- readRDS(file.path(output_dir, "samples_archs4.rds"))

## Prepare pathway priors

In [None]:
# # run localy since server not have internet access

# archs4_gmtList <- list(
#   KEGG = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=KEGG_2021_Human"),
#   BP = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=GO_Biological_Process_2025"),
#   GTEx_Tissues = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=GTEx_Tissues_V8_2023")
# )

# # prefix each gene‐set name with its library to guarantee uniqueness
# for(lib in names(archs4_gmtList)) {
#   names(archs4_gmtList[[lib]]) <- paste0(lib, "_", names(archs4_gmtList[[lib]]))
# }

# archs4_pathMat <- gmtListToSparseMat(archs4_gmtList)
# saveRDS(archs4_pathMat, file = file.path(output_dir, "archs4_pathMat.rds"))

Auto-detected name: KEGG_2021_Human

Using cached file for KEGG_2021_Human

Auto-detected name: GO_Biological_Process_2025

Using cached file for GO_Biological_Process_2025

Auto-detected name: GTEx_Tissues_V8_2023

Using cached file for GTEx_Tissues_V8_2023



In [None]:
# Load GMT files for each database
archs4_kegg_gmt <- getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=KEGG_2021_Human")
archs4_bp_gmt <- getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=GO_Biological_Process_2025")
archs4_gtex_tissues_gmt <- getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=GTEx_Tissues_V8_2023")

# Prefix each gene-set name with its library to guarantee uniqueness
names(archs4_kegg_gmt) <- paste0("KEGG_", names(archs4_kegg_gmt))
names(archs4_bp_gmt) <- paste0("BP_", names(archs4_bp_gmt))
names(archs4_gtex_tissues_gmt) <- paste0("GTEx_Tissues_", names(archs4_gtex_tissues_gmt))

# Save each GMT list separately
saveRDS(archs4_kegg_gmt, file = file.path(output_dir, "archs4_kegg_gmt.rds"))
saveRDS(archs4_bp_gmt, file = file.path(output_dir, "archs4_bp_gmt.rds"))
saveRDS(archs4_gtex_tissues_gmt, file = file.path(output_dir, "archs4_gtex_tissues_gmt.rds"))

In [None]:
archs4_pathMat <- readRDS(file.path(output_dir, "archs4_pathMat.rds"))
archs4_matched <- getMatchedPathwayMat(archs4_pathMat, archs4_genes)
archs4_chatObj <- getChat(archs4_matched)

There are 12486 genes in the intersection between data and prior



Removing 2409 pathways

Inverting...

done



In [None]:
archs4_kegg_gmt <- readRDS(file.path(output_dir, "archs4_kegg_gmt.rds"))
archs4_bp_gmt <- readRDS(file.path(output_dir, "archs4_bp_gmt.rds"))
archs4_gtex_tissues_gmt <- readRDS(file.path(output_dir, "archs4_gtex_tissues_gmt.rds"))

## PLIERfull

In [None]:
N_CORES <- config$ARCHS4$PLIER_PARAMS$N_CORES

archs4_fullRes <- PLIERfull(
  Y                 = archs4_fbm_filt,
  priorMat          = as.matrix(archs4_matched),
  svdres            = archs4_svdRes,
  plier.base.result = archs4_baseRes,
  Chat              = archs4_chatObj,
  k                 = PLIER_K_archs4,
  doCrossval        = TRUE,
  trace             = TRUE,
  max.U.updates     = 50,
  multiplier        = 3,
  ncores            = N_CORES
)

**PLIER v2 **

“`seed` is deprecated and ignored. Use set.seed(seed) before calling this function.”
using provided PLIERbase result

L1=111.157238320606; L2=37.0524127735352

Progress 1 / 350 | Bdiff=0.035954

Progress 2 / 350 | Bdiff=0.005951

, Number of annotated columns is 81

Progress 3 / 350 | Bdiff=0.002198

Progress 4 / 350 | Bdiff=0.001135

, Number of annotated columns is 89

Progress 5 / 350 | Bdiff=0.000741

Progress 6 / 350 | Bdiff=0.000557

, Number of annotated columns is 98

Progress 7 / 350 | Bdiff=0.000455

Progress 8 / 350 | Bdiff=0.000393

, Number of annotated columns is 99

Progress 9 / 350 | Bdiff=0.000361

Progress 10 / 350 | Bdiff=0.000327

, Number of annotated columns is 104

Progress 11 / 350 | Bdiff=0.000304

Progress 12 / 350 | Bdiff=0.000291

, Number of annotated columns is 83

Progress 13 / 350 | Bdiff=0.000276

Progress 14 / 350 | Bdiff=0.000269

, Number of annotated columns is 88

Progress 15 / 350 | Bdiff=0.000260

Progress 16 / 350 | Bdiff=0.000252

In [None]:
saveRDS(archs4_fullRes, file = file.path(output_dir, "archs4_PLIER2.rds"))

# Fix col, row names and summary

In [None]:
# archs4_fullRes <- readRDS(file.path(output_dir, "archs4_PLIER2.rds"))

# colnames(archs4_fullRes$B) <- sample_names

# colnames(archs4_fullRes$Z) <- paste0('LV', seq_len(ncol(archs4_fullRes$Z)))

# archs4_fullRes$summary <- archs4_fullRes$summary %>%
#     dplyr::rename(LV = `LV index`) %>% 
#     dplyr::mutate(LV = paste0('LV', LV))

# saveRDS(archs4_fullRes, file = file.path(output_dir, "archs4_PLIER2.rds"))

In [None]:
# archs4_baseRes <- readRDS(file.path(output_dir, "archs4_baseRes.rds"))

# colnames(archs4_baseRes$B) <- sample_names
# colnames(archs4_baseRes$Z) <- paste0('LV', seq_len(ncol(archs4_fullRes$Z)))
# rownames(archs4_baseRes$Z) <- archs4_genes
# rownames(archs4_baseRes$B) <- paste0('LV', seq_len(ncol(archs4_baseRes$Z)))

# saveRDS(archs4_baseRes, file = file.path(output_dir, "archs4_baseRes.rds"))