# GTEx model building with PLIER2 and PLIER

# PLIER2

## Load libraries

In [2]:
if (!requireNamespace("PLIER", quietly = TRUE)) {
    devtools::install_github("wgmao/PLIER")
}

# 3. Install PLIER2 (mchikina/PLIER2) if not already installed
if (!requireNamespace("PLIER2", quietly = TRUE)) {
    REPO_PATH <- "/home/msubirana/Documents/pivlab/PLIER2"  # adjust
    remotes::install_local(REPO_PATH, force = TRUE, dependencies = FALSE)
}

library(bigstatsr)
library(data.table)
library(dplyr)
library(rsvd)
library(glmnet)
library(Matrix)
library(knitr)
library(here)
library(PLIER)
library(PLIER2)

#source(here("R/solvers.R"))
#source(here("R/utilsNew.R"))
source(here("config.R"))


Attaching package: ‘dplyr’


The following objects are masked from ‘package:data.table’:

    between, first, last


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: Matrix

Loaded glmnet 4.1-10

here() starts at /home/msubirana/Documents/pivlab/plier2-analyses

Loading required package: RColorBrewer

Loading required package: gplots


Attaching package: ‘gplots’


The following object is masked from ‘package:stats’:

    lowess


Loading required package: pheatmap

Loading required package: qvalue


Attaching package: ‘PLIER2’


The following objects are masked from ‘package:PLIER’:

    commonRows, num.pc, projectPLIER


The following object is masked from ‘package:bigstatsr’:

    AUC




## Output directory

In [3]:
output_data_dir <- config$GTEx$DATASET_FOLDER
dir.create(output_data_dir, showWarnings = FALSE, recursive = TRUE)

## Download recount2 

In [7]:
url      <- "https://ndownloader.figshare.com/files/10881866"
out_dir  <- here::here("data", "recount2")
zip_file <- file.path(out_dir, "recount2.zip")

if (!dir.exists(out_dir)) dir.create(out_dir, recursive = TRUE)

# Download if missing
if (!file.exists(zip_file)) {
  download.file(url, zip_file, mode = "wb", method = "libcurl")
}

# Unzip if the RDS files aren’t already there
rds1 <- file.path(out_dir, "recount_data_prep_PLIER.RDS")
rds2 <- file.path(out_dir, "recount_rpkm.RDS")

if (!file.exists(rds1) || !file.exists(rds2)) {
  unzip(zip_file, exdir = out_dir)
}


“downloaded length 0 != reported length 0”
“URL 'https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/10881866/recount2_PLIER_data.zip?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIYCQYOYV5JSSROOA/20250728/eu-west-1/s3/aws4_request&X-Amz-Date=20250728T194630Z&X-Amz-Expires=10&X-Amz-SignedHeaders=host&X-Amz-Signature=92872d3ae1328e657dc6e48979177dbedb463533be61e569344423cfa52dd279': Timeout of 60 seconds was reached”


ERROR: Error in download.file(url, zip_file, mode = "wb", method = "libcurl"): download from 'https://ndownloader.figshare.com/files/10881866' failed


In [6]:
url <- config$GTEx$URL
dest_dir <-  config$GTEx$DATASET_FOLDER
dest_gz  <- file.path(dest_dir, basename(url))

if (!file.exists(dest_gz)) {
  dir.create(dest_dir, recursive = TRUE, showWarnings = FALSE)
  download.file(url, dest_gz, mode = "wb")
  message("Downloaded to: ", dest_gz)
} else {
  message("File already exists, skipping download.")
}

File already exists, skipping download.



## Preprocess GTEx data

In [31]:
exprs_path  <- file.path(config$GTEx$DATASET_FOLDER, 'GTEx_Analysis_2017-06-05_v8_RNASeQCv1.1.9_gene_tpm.gct.gz')
output_file <- config$GTEx$DATASET_FILE

if (!file.exists(output_file)) {
  dir.create(dirname(output_file), recursive = TRUE, showWarnings = FALSE)
  exprs_data <- read.table(exprs_path, header = TRUE, sep = "\t", skip = 2, check.names = FALSE)
  saveRDS(exprs_data, config$GTEx$DATASET_FILE)
  message("File successfully written to: ", config$GTEx$DATASET_FILE)
} else {
  message("Output file already exists. Skipping.")
}

# Aggregate in-place by 'description'
gtex <- readRDS(here(config$GTEx$DATASET_FILE))
gtex <- as.data.table(gtex)
aggregated_gtex <- gtex[, lapply(.SD, sum), by = Description, .SDcols = is.numeric]

genes <- aggregated_gtex$Description
samples <- colnames(aggregated_gtex[, -1])
data_mat <- as.matrix(aggregated_gtex[, -1])

# Create the FBM
fbm_file <- file.path(output_data_dir, "FBMgtex")
gtexFBM <- FBM(nrow = nrow(data_mat), ncol = ncol(data_mat), backingfile = fbm_file, create_bk = T)

# Populate it with data
block_size <- config$GENERAL$CHUNK_SIZE
n_blocks <- ceiling(nrow(aggregated_gtex) / block_size)

for (i in 1:n_blocks) {
  start_row <- (i-1) * block_size + 1
  end_row <- min(i * block_size, nrow(data_mat))
  
  gtexFBM[start_row:end_row, ] <- as.matrix(data_mat[start_row:end_row, ])
}

# Preprocess and z‑score FBM
prep_gtex <- preprocessPLIER2FBM(
  fbm        = gtexFBM,
  mean_cutoff= 0.5,
  var_cutoff = 0.1
)

gtex_fbm_filt <- prep_gtex$fbm_filtered
gtex_rowStats <- prep_gtex$rowStats
zscorePLIER2FBM(gtex_fbm_filt, gtex_rowStats)
gtex_genes <- genes[prep_gtex$kept_rows]

Output file already exists. Skipping.



Applying log2 transformation

No NA values found

Applying Z-score transformation



## SVD computation and SVD K estimation

In [32]:
g_fb <- nrow(gtex_fbm_filt)
samples_fb <- ncol(gtex_fbm_filt)
SVD_K_gtex  <- min(g_fb, samples_fb) - 1

message("Using SVD K = ", SVD_K_gtex)

set.seed(1)
gtex_svdRes <- big_randomSVD(gtex_fbm_filt, k = SVD_K_gtex)

Using SVD K = 17381



## Estimate K for PLIER

In [33]:
PLIER_K_gtex <- num.pc(list(d = gtex_svdRes$d))
message("Inferred PLIER K = ", PLIER_K_gtex)

Inferred PLIER K = 206



## PLIERbase initialization

In [34]:
gtex_baseRes <- PLIERbase(
  Y      = gtex_fbm_filt,
  k      = PLIER_K_gtex,
  svdres = gtex_svdRes,
  trace  = TRUE
)

****



[1] "L1 is set to 67.1332478272621"
[1] "L2 is set to 201.399743481786"
Progress 49 / 200 | Bdiff=0.000434, minCor=0.990968Converged at iteration= 49 | Bdiff=0.000434,  tol=0.000500     

## Prepare pathway priors

In [41]:
gtex_gmtList <- list(
  GTEx_Tissues = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=GTEx_Tissues_V8_2023"),
  BP            = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=GO_Biological_Process_2025"),
  MSigDB        = getGMT("https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=MSigDB_Hallmark_2020")
)

gtex_pathMat <- gmtListToSparseMat(gtex_gmtList)
gtex_matched <- getMatchedPathwayMat(gtex_pathMat, gtex_genes)
gtex_chatObj <- getChat(gtex_matched)

there are 13567 genes in the intersction between data and prior

Removing 2020 pathways

Presolving using dot method

done



## PLIERfull

In [42]:
gtex_fullRes <- PLIERfull(
  Y                 = gtex_fbm_filt,
  priorMat          = as.matrix(gtex_matched),
  svdres            = gtex_svdRes,
  plier.base.result = gtex_baseRes,
  Chat              = gtex_chatObj,
  k                 = PLIER_K_gtex,
  doCrossval        = TRUE,
  trace             = TRUE,
  max.U.updates=50
)

**PLIER v2 **

using provided PLIERbase result

L1=67.1332478272621; L2=201.399743481786



Progress 80 / 350 | Bdiff=0.000171 Number of annotated columns is 190, Number of annotated columns is 190, Number of annotated columns is 190, Number of annotated columns is 190, Number of annotated columns is 190, Number of annotated columns is 188, Number of annotated columns is 188, Number of annotated columns is 188, Number of annotated columns is 189, Number of annotated columns is 189, Number of annotated columns is 186, Number of annotated columns is 186, Number of annotated columns is 186, Number of annotated columns is 187, Number of annotated columns is 187, Number of annotated columns is 187, Number of annotated columns is 187, Number of annotated columns is 187, Number of annotated columns is 187, Number of annotated columns is 188, Number of annotated columns is 186, Number of annotated columns is 186, Number of annotated columns is 186, Number of annotated columns is 187, Number of annotated columns is 188, Number of annotated columns is 188, Number of annotated columns i

Bdiff is not decreasing



Progress 81 / 350 | Bdiff=0.00017189

Bdiff is not decreasing



Progress 82 / 350 | Bdiff=0.000177

Bdiff is not decreasing



Progress 83 / 350 | Bdiff=0.00017988

Bdiff is not decreasing



Progress 84 / 350 | Bdiff=0.000182

Bdiff is not decreasing



Progress 86 / 350 | Bdiff=0.00018789

Bdiff is not decreasing



Converged at 108 / 350 | Bdiff=0.000132, minCor=0.993962d columns is 189, Number of annotated columns is 190, Number of annotated columns is 190, Number of annotated columns is 190, Number of annotated columns is 190, Number of annotated columns is 190, Number of annotated columns is 190


Updating Z for CV

crossValidation

There are 162  LVs with AUC>0.70

There are 129  LVs with AUC>0.90



In [43]:
saveRDS(gtex_fullRes, file = file.path(output_data_dir, "gtex_PLIER2.rds"))

In [52]:
gtex_fullRes$summary  %>% 
dplyr::filter(FDR < 0.05) %>% 
dplyr::filter(AUC > 0.7)  %>% 
tail()

Unnamed: 0_level_0,pathway,LV index,AUC,p-value,FDR
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>
1050,Translation (GO:0006412),202,0.8599546,4.5513800000000005e-17,5.120303e-15
1051,Oxidative Phosphorylation,202,0.8838041,4.267331e-17,5.120303e-15
1052,Adipose - Visceral (Omentum) Male 30-39 Up,203,0.7972713,9.732122e-07,1.469616e-06
1053,Adipose - Visceral (Omentum) Female 30-39 Up,203,0.7227635,0.000181021,0.0002322988
1054,Adipose - Visceral (Omentum) Male 20-29 Up,203,0.7998422,7.930707e-07,1.207041e-06
1055,Fallopian Tube Female 40-49 Up,203,0.7646562,0.0004618164,0.0005772706


# PLIER

Run PLIER with the same inputs than PLIER2


In [53]:
gtex_plier = PLIER::PLIER(
    gtex_fbm_filt[], 
    as.matrix(gtex_matched), 
    svdres = gtex_svdRes, 
    Chat = as.matrix(gtex_chatObj), 
    doCrossval = TRUE, 
    max.iter = config$GTEx$PLIER_PARAMS$MAX_ITER,
    k = PLIER_K_gtex
  )

Removing 0 pathways with too few genes



[1] 201.3997
[1] "L2 is set to 201.399743481786"
[1] "L1 is set to 100.699871740893"


errorY (SVD based:best possible) = 0.3686

New L3 is 0.000804733010124613

New L3 is 0.000488095243523415

New L3 is 0.000430742540575688

New L3 is 0.000430742540575688

New L3 is 0.000430742540575688

New L3 is 0.000380128957869464

New L3 is 0.000430742540575688

New L3 is 0.000380128957869464

Bdiff is not decreasing

Bdiff is not decreasing

Bdiff is not decreasing

Bdiff is not decreasing

Bdiff is not decreasing

Bdiff is not decreasing

converged at  iteration 179 Bdiff is not decreasing

There are 132  LVs with AUC>0.70



In [54]:
saveRDS(gtex_plier, file = file.path(output_data_dir, "gtex_PLIER.rds"))