# GTEx model with different priors

## Load libraries

In [2]:
if (!requireNamespace("PLIER", quietly = TRUE)) {
    devtools::install_github("wgmao/PLIER")
}

# 3. Install PLIER2 (mchikina/PLIER2) if not already installed
if (!requireNamespace("PLIER2", quietly = TRUE)) {
    REPO_PATH <- "/home/msubirana/Documents/pivlab/PLIER2"  # adjust
    remotes::install_local(REPO_PATH, force = TRUE, dependencies = FALSE)
}

library(bigstatsr)
library(data.table)
library(dplyr)
library(rsvd)
library(glmnet)
library(Matrix)
library(knitr)
library(here)
library(PLIER2)

source(here("config.R"))


Attaching package: ‘dplyr’


The following objects are masked from ‘package:data.table’:

    between, first, last


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: Matrix

Loaded glmnet 4.1-10

here() starts at /home/msubirana/Documents/pivlab/plier2-analyses



## Output directory

In [3]:
output_data_dir <- config$GTEx$DATASET_FOLDER
dir.create(output_data_dir, showWarnings = FALSE, recursive = TRUE)

# Load GTEx PLIER2 base model

In [4]:
gtex_baseRes <- readRDS(file.path(output_data_dir, "gtex_PLIER2_baseRes.rds"))

In [5]:
gtex_svdRes <- readRDS(file.path(output_data_dir, "gtex_svdRes.rds"))

In [6]:
PLIER_K_gtex <- readRDS(file.path(output_data_dir, "PLIER_K_gtex.rds"))

In [7]:
gtex_fbm_filt <- readRDS(file.path(output_data_dir, "gtex_fbm_filt.rds"))

In [8]:
gtex_genes <- readRDS(file.path(output_data_dir, "gtex_genes.rds"))

In [9]:
samples <- readRDS(file.path(output_data_dir, "gtex_samples.rds"))

## Prior KEGG

In [None]:
# Vector of prior URLs
prior_urls <- c(
  "https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=KEGG_2021_Human",
  "https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=GWAS_Catalog_2025",
  "https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=GO_Biological_Process_2025",
  "https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=GTEx_Tissues_V8_2023",
  "https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=Chromosome_Location",
  "https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=Human_Gene_Atlas",
  "https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=LINCS_L1000_CRISPR_KO_Consensus_Sigs",
  "https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=Metabolomics_Workbench_Metabolites_2022",
  "https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=OMIM_Disease",
  "https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=Proteomics_Drug_Atlas_2023",
  "https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=TF_Perturbations_Followed_by_Expression",
  "https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=UK_Biobank_GWAS_v1",
  "https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=CellMarker_2024"
)

# Names are exactly what's after libraryName=
names(prior_urls) <- sub(".*libraryName=", "", prior_urls)

results <- list()

for (prior_name in names(prior_urls)) {
  url <- prior_urls[[prior_name]]
  message(">>> Running PLIER for prior: ", prior_name)

  try({
    gmt <- getGMT(url)
    gmt_list <- list()
    gmt_list[[prior_name]] <- gmt

    pathMat   <- gmtListToSparseMat(gmt_list)
    matched   <- getMatchedPathwayMat(pathMat, gtex_genes)
    if (is.null(matched) || ncol(matched) == 0) {
      warning("No matched pathways for ", prior_name, "; skipping.")
      next
    }
    chatObj   <- getChat(matched)

    fullRes <- PLIERfull(
      Y                 = gtex_fbm_filt,
      priorMat          = as.matrix(matched),
      svdres            = gtex_svdRes,
      plier.base.result = gtex_baseRes,
      Chat              = chatObj,
      k                 = PLIER_K_gtex,
      doCrossval        = TRUE,
      trace             = TRUE,
      max.U.updates     = config$GTEx$MAX_U_UPDATES
    )

    if (!is.null(fullRes$B)) colnames(fullRes$B) <- samples
    if (!is.null(fullRes$Z)) colnames(fullRes$Z) <- paste0("LV", seq_len(ncol(fullRes$Z)))
    if (!is.null(fullRes$summary)) {
      fullRes$summary <- fullRes$summary |>
        dplyr::rename(LV = LV_index) |>
        dplyr::mutate(LV = paste0("LV", LV))
    }

    out_file <- file.path(output_data_dir, sprintf("gtex_%s_PLIER2.rds", prior_name))
    saveRDS(fullRes, file = out_file)
    results[[prior_name]] <- fullRes

    message("Saved: ", out_file)
  }, silent = FALSE)
}

invisible(results)

>>> Running PLIER for prior: KEGG_2021_Human

Auto-detected name: KEGG_2021_Human

Using cached file for KEGG_2021_Human

There are 6409 genes in the intersection between data and prior

Removing 10 pathways

Inverting...

done

**PLIER v2 **

“`seed` is deprecated and ignored. Use set.seed(seed) before calling this function.”
using provided PLIERbase result

L1=45.2778145717356; L2=135.833443715207

Progress 1 / 350 | Bdiff=0.000413

Progress 2 / 350 | Bdiff=0.000367

, Number of annotated columns is 297

Progress 3 / 350 | Bdiff=0.000338

Progress 4 / 350 | Bdiff=0.000299

, Number of annotated columns is 302

Progress 5 / 350 | Bdiff=0.000278

Progress 6 / 350 | Bdiff=0.000250

, Number of annotated columns is 302

Progress 7 / 350 | Bdiff=0.000231

Progress 8 / 350 | Bdiff=0.000209

, Number of annotated columns is 304

Progress 9 / 350 | Bdiff=0.000191

Progress 10 / 350 | Bdiff=0.000188

, Number of annotated columns is 306

Progress 11 / 350 | Bdiff=0.000179

Progress 12 / 350 |

In [10]:
# Vector of prior URLs
prior_urls <- c(
  "https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=CellMarker_2024"
)

# Names are exactly what's after libraryName=
names(prior_urls) <- sub(".*libraryName=", "", prior_urls)

results <- list()

for (prior_name in names(prior_urls)) {
  url <- prior_urls[[prior_name]]
  message(">>> Running PLIER for prior: ", prior_name)

  try({
    gmt <- getGMT(url)
    gmt_list <- list()
    gmt_list[[prior_name]] <- gmt

    pathMat   <- gmtListToSparseMat(gmt_list)
    matched   <- getMatchedPathwayMat(pathMat, gtex_genes)
    if (is.null(matched) || ncol(matched) == 0) {
      warning("No matched pathways for ", prior_name, "; skipping.")
      next
    }
    chatObj   <- getChat(matched)

    fullRes <- PLIERfull(
      Y                 = gtex_fbm_filt,
      priorMat          = as.matrix(matched),
      svdres            = gtex_svdRes,
      plier.base.result = gtex_baseRes,
      Chat              = chatObj,
      k                 = PLIER_K_gtex,
      doCrossval        = TRUE,
      trace             = TRUE,
      max.U.updates     = config$GTEx$MAX_U_UPDATES
    )

    if (!is.null(fullRes$B)) colnames(fullRes$B) <- samples
    if (!is.null(fullRes$Z)) colnames(fullRes$Z) <- paste0("LV", seq_len(ncol(fullRes$Z)))
    if (!is.null(fullRes$summary)) {
      fullRes$summary <- fullRes$summary |>
        dplyr::rename(LV = LV_index) |>
        dplyr::mutate(LV = paste0("LV", LV))
    }

    out_file <- file.path(output_data_dir, sprintf("gtex_%s_PLIER2.rds", prior_name))
    saveRDS(fullRes, file = out_file)
    results[[prior_name]] <- fullRes

    message("Saved: ", out_file)
  }, silent = FALSE)
}

invisible(results)

>>> Running PLIER for prior: CellMarker_2024

Auto-detected name: CellMarker_2024

Using cached file for CellMarker_2024



There are 11124 genes in the intersection between data and prior

Removing 965 pathways

Inverting...

done

**PLIER v2 **

“`seed` is deprecated and ignored. Use set.seed(seed) before calling this function.”
using provided PLIERbase result

L1=45.2778145717356; L2=135.833443715207

Progress 1 / 350 | Bdiff=0.000413

Progress 2 / 350 | Bdiff=0.000367

, Number of annotated columns is 267

Progress 3 / 350 | Bdiff=0.000340

Progress 4 / 350 | Bdiff=0.000302

, Number of annotated columns is 269

Progress 5 / 350 | Bdiff=0.000279

Progress 6 / 350 | Bdiff=0.000247

, Number of annotated columns is 269

Progress 7 / 350 | Bdiff=0.000234

Progress 8 / 350 | Bdiff=0.000212

, Number of annotated columns is 272

Progress 9 / 350 | Bdiff=0.000196

Progress 10 / 350 | Bdiff=0.000188

, Number of annotated columns is 275

Progress 11 / 350 | Bdiff=0.000179

Progress 12 / 350 | Bdiff=0.000175

, Number of annotated columns is 272

Progress 13 / 350 | Bdiff=0.000162

Progress 14 / 350 | Bdiff=0.0

In [None]:
# priors
gtex_gmtList <- list(
  KEGG = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=KEGG_2021_Human"),
  GWAS_Catalog = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=GWAS_Catalog_2025"),
  BP = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=GO_Biological_Process_2025"),
  GTEx_Tissues = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=GTEx_Tissues_V8_2023"),
  Chromosome_Location = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=Chromosome_Location"),
  Human_Gene_Atlas = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=Human_Gene_Atlas"),
  LINCS_L1000_CRISPR_KO = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=LINCS_L1000_CRISPR_KO_Consensus_Sigs"),
  Metabolomics_Workbench = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=Metabolomics_Workbench_Metabolites_2022"),
  OMIM_Disease = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=OMIM_Disease"),
  Proteomics_Drug_Atlas = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=Proteomics_Drug_Atlas_2023"),
  TF_Perturbations = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=TF_Perturbations_Followed_by_Expression"),
  UK_Biobank_GWAS = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=UK_Biobank_GWAS_v1"),
  CellMarker = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=CellMarker_2024")

)

# prefix each gene‐set name with its library to guarantee uniqueness
for(lib in names(gtex_gmtList)) {
  names(gtex_gmtList[[lib]]) <- paste0(lib, "_", names(gtex_gmtList[[lib]]))
}

gtex_pathMat <- gmtListToSparseMat(gtex_gmtList)
gtex_matched <- getMatchedPathwayMat(gtex_pathMat, gtex_genes)
gtex_chatObj <- getChat(gtex_matched)

# PLIERfull
gtex_fullRes <- PLIERfull(
  Y                 = gtex_fbm_filt,
  priorMat          = as.matrix(gtex_matched),
  svdres            = gtex_svdRes,
  plier.base.result = gtex_baseRes,
  Chat              = gtex_chatObj,
  k                 = PLIER_K_gtex,
  doCrossval        = TRUE,
  trace             = TRUE,
  max.U.updates= config$GTEx$MAX_U_UPDATES
)

# Fix colnames and rownames
colnames(gtex_fullRes$B) <- samples
colnames(gtex_fullRes$Z) <- paste0('LV', seq_len(ncol(gtex_fullRes$Z)))
gtex_fullRes$summary <- gtex_fullRes$summary %>%
    dplyr::rename(LV = LV_index)  %>% 
    dplyr::mutate(LV = paste0('LV', LV))

# save
saveRDS(gtex_fullRes, file = file.path(output_data_dir, "gtex_all_PLIER2.rds"))

Auto-detected name: KEGG_2021_Human

Using cached file for KEGG_2021_Human

Auto-detected name: GWAS_Catalog_2025

Using cached file for GWAS_Catalog_2025

Auto-detected name: GO_Biological_Process_2025

Using cached file for GO_Biological_Process_2025

Auto-detected name: GTEx_Tissues_V8_2023

Using cached file for GTEx_Tissues_V8_2023

Auto-detected name: Chromosome_Location

Using cached file for Chromosome_Location

Auto-detected name: Human_Gene_Atlas

Using cached file for Human_Gene_Atlas

Auto-detected name: LINCS_L1000_CRISPR_KO_Consensus_Sigs

Using cached file for LINCS_L1000_CRISPR_KO_Consensus_Sigs

Auto-detected name: Metabolomics_Workbench_Metabolites_2022

Using cached file for Metabolomics_Workbench_Metabolites_2022

Auto-detected name: OMIM_Disease

Using cached file for OMIM_Disease

Auto-detected name: Proteomics_Drug_Atlas_2023

Using cached file for Proteomics_Drug_Atlas_2023

Auto-detected name: TF_Perturbations_Followed_by_Expression

Using cached file for TF_Pe

Progress 81 / 350 | Bdiff=0.000191 Number of annotated columns is 163, Number of annotated columns is 163, Number of annotated columns is 164, Number of annotated columns is 165, Number of annotated columns is 167, Number of annotated columns is 162, Number of annotated columns is 162, Number of annotated columns is 164, Number of annotated columns is 165, Number of annotated columns is 165, Number of annotated columns is 163, Number of annotated columns is 164, Number of annotated columns is 164, Number of annotated columns is 165, Number of annotated columns is 167, Number of annotated columns is 164, Number of annotated columns is 165, Number of annotated columns is 168, Number of annotated columns is 168, Number of annotated columns is 170, Number of annotated columns is 167, Number of annotated columns is 167, Number of annotated columns is 168, Number of annotated columns is 170, Number of annotated columns is 172, Number of annotated columns is 169, Number of annotated columns i

converged at  iteration 81 Bdiff is not decreasing

Updating Z for CV

crossValidation

There are 105  LVs with AUC>0.70

There are 60  LVs with AUC>0.90



In [4]:
gtex_GO_BP_PLIER2 <- readRDS(here("output/gtex/gtex_GO_Biological_Process_2025_PLIER2.rds"))
gtex_KEGG_PLIER2 <- readRDS(here("output/gtex/gtex_KEGG_2021_Human_PLIER2.rds"))
gtex_GTEx_Tissues_PLIER2 <- readRDS(here("output/gtex/gtex_GTEx_Tissues_V8_2023_PLIER2.rds"))

In [10]:
models <- list(
    gtex_GO_BP_PLIER2 = gtex_GO_BP_PLIER2,
    gtex_KEGG_PLIER2 = gtex_KEGG_PLIER2,
    gtex_GTEx_Tissues_PLIER2 = gtex_GTEx_Tissues_PLIER2
)

for (model_name in names(models)) {
    model <- models[[model_name]]
    model_dir <- file.path(output_data_dir, model_name)
    dir.create(model_dir, showWarnings = FALSE, recursive = TRUE)
    
    B <- as.data.frame(model$B)
    write.csv(B, file.path(model_dir, "gtex_B.csv"), row.names = TRUE)
    
    Z <- as.data.frame(model$Z)
    write.csv(Z, file.path(model_dir, "gtex_Z.csv"), row.names = TRUE)
    
    summary <- model$summary
    write.csv(summary, file.path(model_dir, "gtex_summary.csv"), row.names = FALSE)
}