# GTEx model with different priors

## Load libraries

In [1]:
if (!requireNamespace("PLIER", quietly = TRUE)) {
    devtools::install_github("wgmao/PLIER")
}

# 3. Install PLIER2 (mchikina/PLIER2) if not already installed
if (!requireNamespace("PLIER2", quietly = TRUE)) {
    REPO_PATH <- "/home/msubirana/Documents/pivlab/PLIER2"  # adjust
    remotes::install_local(REPO_PATH, force = TRUE, dependencies = FALSE)
}

library(bigstatsr)
library(data.table)
library(dplyr)
library(rsvd)
library(glmnet)
library(Matrix)
library(knitr)
library(here)
library(PLIER2)

source(here("config.R"))


Attaching package: ‘dplyr’


The following objects are masked from ‘package:data.table’:

    between, first, last


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: Matrix

Loaded glmnet 4.1-10

here() starts at /home/msubirana/Documents/pivlab/plier2-analyses



## Output directory

In [2]:
output_data_dir <- config$GTEx$DATASET_FOLDER
dir.create(output_data_dir, showWarnings = FALSE, recursive = TRUE)

# Load GTEx PLIER2 base model

In [3]:
gtex_svdRes <- readRDS(file.path(output_data_dir, "gtex_svdRes.rds"))

In [4]:
PLIER_K_gtex <- 782

In [5]:
gtex_fbm_filt <- readRDS(file.path(output_data_dir, "gtex_fbm_filt.rds"))

In [6]:
gtex_genes <- readRDS(file.path(output_data_dir, "gtex_genes.rds"))

In [7]:
samples <- readRDS(file.path(output_data_dir, "gtex_samples.rds"))

# PLIERbase model

In [8]:
gtex_baseRes <- PLIERbase(
  Y      = gtex_fbm_filt,
  k      = PLIER_K_gtex,
  svdres = gtex_svdRes,
  trace  = TRUE,
  ncores = 4
)

****

L1 is set to 34.7581967384243

L2 is set to 104.274590215273

Progress 1 / 200 | Bdiff=0.298817, minCor=0.623430

Progress 2 / 200 | Bdiff=0.046200, minCor=0.898014

Progress 3 / 200 | Bdiff=0.018837, minCor=0.959064

Progress 4 / 200 | Bdiff=0.011856, minCor=0.975358

Progress 5 / 200 | Bdiff=0.008577, minCor=0.981941

Progress 6 / 200 | Bdiff=0.006701, minCor=0.986470

Progress 7 / 200 | Bdiff=0.005478, minCor=0.989727

Progress 8 / 200 | Bdiff=0.004617, minCor=0.991356

Progress 9 / 200 | Bdiff=0.003980, minCor=0.992366

Progress 10 / 200 | Bdiff=0.003489, minCor=0.993332

Progress 11 / 200 | Bdiff=0.003101, minCor=0.994171

Progress 12 / 200 | Bdiff=0.002788, minCor=0.994778

Progress 13 / 200 | Bdiff=0.002528, minCor=0.995673

Progress 14 / 200 | Bdiff=0.002307, minCor=0.995418

Progress 15 / 200 | Bdiff=0.002118, minCor=0.995102

Progress 16 / 200 | Bdiff=0.001960, minCor=0.995127

Progress 17 / 200 | Bdiff=0.001827, minCor=0.995394

Progress 18 / 200 | Bdiff=0.001712, minC

In [9]:
saveRDS(gtex_baseRes, file = file.path(output_data_dir, "gtex_prior_PLIER_PLIER2_baseRes.rds"))

# Priors

In [10]:
plier_pathways <- read.csv(here('data', 'plier-pathways.csv'))

head(plier_pathways)

Unnamed: 0_level_0,X,IRIS_Bcell.Memory_IgG_IgA,IRIS_Bcell.Memory_IgM,IRIS_Bcell.naive,IRIS_CD4Tcell.N0,IRIS_CD4Tcell.Th1.restimulated12hour,IRIS_CD4Tcell.Th1.restimulated48hour,IRIS_CD4Tcell.Th2.restimulated12hour,IRIS_CD4Tcell.Th2.restimulated48hour,IRIS_CD8Tcell.N0,⋯,KEGG_GNRH_SIGNALING_PATHWAY,KEGG_BASAL_TRANSCRIPTION_FACTORS,REACTOME_SYNTHESIS_OF_DNA,KEGG_HEMATOPOIETIC_CELL_LINEAGE,KEGG_T_CELL_RECEPTOR_SIGNALING_PATHWAY,PID_IL4_2PATHWAY,REACTOME_SIGNALING_BY_THE_B_CELL_RECEPTOR_BCR,PID_BCR_5PATHWAY,PID_TELOMERASEPATHWAY,PID_PI3KPLCTRKPATHWAY
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,GAS6,0,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0
2,MMP14,0,0,0,0,0,0,0,0,0,⋯,1,0,0,0,0,0,0,0,0,0
3,MARCKSL1,0,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0
4,SPARC,0,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0
5,CTSD,0,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0
6,EPAS1,0,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0


In [11]:
# 0-1 matrix to gmtList

df <- plier_pathways
gene_col <- if ("X" %in% names(df)) "X" else names(df)[1]

genes_raw <- toupper(trimws(df[[gene_col]]))
M <- as.matrix(df[setdiff(names(df), gene_col)])
mode(M) <- "numeric"
M[is.na(M)] <- 0
M[M != 0] <- 1

M <- rowsum(M, genes_raw, reorder = FALSE)
genes <- rownames(M)

plier_sets <- lapply(seq_len(ncol(M)), function(j) genes[ M[, j] != 0 ])
names(plier_sets) <- colnames(M)

plier_gmtList <- list(PLIER = plier_sets)

gtex_pathMat <- gmtListToSparseMat(plier_gmtList)
gtex_matched <- getMatchedPathwayMat(gtex_pathMat, gtex_genes)
gtex_chatObj <- getChat(gtex_matched)

There are 6439 genes in the intersection between data and prior

Removing 4 pathways

Inverting...

done



# PLIERfull

In [12]:
gtex_fullRes <- PLIERfull(
  Y                 = gtex_fbm_filt,
  priorMat          = as.matrix(gtex_matched),
  svdres            = gtex_svdRes,
  plier.base.result = gtex_baseRes,
  Chat              = gtex_chatObj,
  k                 = PLIER_K_gtex,
  doCrossval        = TRUE,
  trace             = TRUE,
  max.U.updates= config$GTEx$MAX_U_UPDATES,
  ncores = 4
)

**PLIER v2 **

“`seed` is deprecated and ignored. Use set.seed(seed) before calling this function.”
using provided PLIERbase result

L1=34.7581967384243; L2=104.274590215273

Progress 1 / 350 | Bdiff=0.000468

Progress 2 / 350 | Bdiff=0.000448

, Number of annotated columns is 373

Progress 3 / 350 | Bdiff=0.000439

Progress 4 / 350 | Bdiff=0.000414

, Number of annotated columns is 383

Progress 5 / 350 | Bdiff=0.000380

Progress 6 / 350 | Bdiff=0.000353

, Number of annotated columns is 390

Progress 7 / 350 | Bdiff=0.000328

Progress 8 / 350 | Bdiff=0.000313

, Number of annotated columns is 396

Progress 9 / 350 | Bdiff=0.000304

Progress 10 / 350 | Bdiff=0.000290

, Number of annotated columns is 400

Progress 11 / 350 | Bdiff=0.000281

Progress 12 / 350 | Bdiff=0.000261

, Number of annotated columns is 365

Progress 13 / 350 | Bdiff=0.000241

Progress 14 / 350 | Bdiff=0.000235

, Number of annotated columns is 370

Progress 15 / 350 | Bdiff=0.000226

Progress 16 / 350 | Bdiff=0.

In [13]:
colnames(gtex_fullRes$B) <- samples
colnames(gtex_fullRes$Z) <- paste0('LV', seq_len(ncol(gtex_fullRes$Z)))
gtex_fullRes$summary <- gtex_fullRes$summary %>%
    dplyr::rename(LV = `LV index`)  %>% 
    dplyr::mutate(LV = paste0('LV', LV))

saveRDS(gtex_fullRes, file = file.path(output_data_dir, "gtex_prior_PLIER_PLIER2.rds"))

In [16]:
head(gtex_fullRes$Z)
head(gtex_fullRes$B)
dim(gtex_fullRes$Z)
dim(gtex_fullRes$B)

Unnamed: 0,LV1,LV2,LV3,LV4,LV5,LV6,LV7,LV8,LV9,LV10,⋯,LV773,LV774,LV775,LV776,LV777,LV778,LV779,LV780,LV781,LV782
WASH7P,0.0,0,0.0,0,0,0.0,0,0,0,0,⋯,0.0,0,0,0.0,0,0,0,0.0,0,0
RP11-34P13.15,0.0,0,0.1906793,0,0,0.2747268,0,0,0,0,⋯,0.0,0,0,0.0,0,0,0,0.0,0,0
RP11-34P13.16,0.0,0,0.0,0,0,0.2838988,0,0,0,0,⋯,0.0,0,0,0.0,0,0,0,0.0,0,0
RP11-34P13.18,0.2059246,0,0.0,0,0,0.0,0,0,0,0,⋯,0.0,0,0,0.1560272,0,0,0,0.0,0,0
AP006222.2,0.0,0,0.0,0,0,0.0,0,0,0,0,⋯,0.780123,0,0,0.0,0,0,0,0.0,0,0
MTND1P23,0.0,0,0.0,0,0,0.0,0,0,0,0,⋯,0.0,0,0,0.0,0,0,0,0.3017083,0,0


Unnamed: 0,GTEX-1117F-0226-SM-5GZZ7,GTEX-1117F-0426-SM-5EGHI,GTEX-1117F-0526-SM-5EGHJ,GTEX-1117F-0626-SM-5N9CS,GTEX-1117F-0726-SM-5GIEN,GTEX-1117F-1326-SM-5EGHH,GTEX-1117F-2426-SM-5EGGH,GTEX-1117F-2526-SM-5GZY6,GTEX-1117F-2826-SM-5GZXL,GTEX-1117F-2926-SM-5GZYI,⋯,GTEX-ZZPU-1126-SM-5N9CW,GTEX-ZZPU-1226-SM-5N9CK,GTEX-ZZPU-1326-SM-5GZWS,GTEX-ZZPU-1426-SM-5GZZ6,GTEX-ZZPU-1826-SM-5E43L,GTEX-ZZPU-2126-SM-5EGIU,GTEX-ZZPU-2226-SM-5EGIV,GTEX-ZZPU-2426-SM-5E44I,GTEX-ZZPU-2626-SM-5E45Y,GTEX-ZZPU-2726-SM-5NQ8O
LV1,0.3134514,0.063208122,0.4228512,0.15561323,-0.3525828,-0.143900075,0.30477228,0.24495665,-0.01102135,0.07544365,⋯,-0.26708161,0.07252696,0.158034861,-0.12853128,0.1046009,0.39972946,0.006132236,0.22932814,-0.3737471,-0.112149135
LV2,-0.18114592,-0.185369357,-0.2259942,-0.19106239,-0.15302675,-0.17498583,-0.12453004,-0.19063816,-0.19414603,-0.1855091,⋯,-0.15445932,-0.13113603,-0.153376489,-0.15998244,-0.2107107,-0.21825497,-0.202212634,-0.25608172,-0.1646813,-0.23531472
LV3,-0.03294719,-0.296786839,-0.1775444,-0.11640383,-0.24122365,-0.172586268,-0.12629879,1.66548826,-0.04455627,0.58312032,⋯,-0.20797872,-0.10418558,0.058206812,-0.07251133,-0.2403972,-0.11209947,1.708709713,-0.2593691,-0.3504266,-0.161385386
LV4,0.93920504,-0.225546313,0.5764434,0.59213022,0.25379341,0.41810391,0.51919188,-0.66745738,0.45936656,-0.06858499,⋯,0.25397032,0.05345132,0.145339696,-0.06870691,0.391718,0.48867692,-1.11955965,0.57634255,-0.2654301,0.58515063
LV5,0.0113791,0.294332288,-0.1329019,-0.0775871,-0.01799054,0.007615462,-0.18125791,-0.08299947,0.02124745,-0.05476285,⋯,0.07842289,0.0258716,-0.082741155,-0.02108908,-0.1390655,-0.17389431,0.042709882,-0.21595706,1.1071127,0.037748535
LV6,0.09469034,-0.004584202,0.1029553,0.01241803,-0.12123522,-0.079666461,0.05149123,-0.01705806,-0.03661375,-0.06510471,⋯,-0.12716895,-0.04930054,0.004743758,-0.13751762,-0.0857733,0.02864907,-0.121971297,-0.03360678,-0.1279173,-0.004516179


In [15]:
gtex_fullRes$summary %>% 
dplyr::filter(FDR < 0.05 & AUC > 0.7)

pathway,LV,AUC,p-value,FDR
<chr>,<chr>,<dbl>,<dbl>,<dbl>
REACTOME_TRANSCRIPTION,LV1,0.7245583,1.902492e-06,2.806646e-05
REACTOME_TELOMERE_MAINTENANCE,LV1,0.7335667,3.584173e-03,1.750957e-02
REACTOME_RNA_POL_I_PROMOTER_OPENING,LV3,0.8584963,1.630193e-03,9.270944e-03
REACTOME_GLYCOSAMINOGLYCAN_METABOLISM,LV4,0.7500991,2.962180e-04,2.284939e-03
REACTOME_EXTRACELLULAR_MATRIX_ORGANIZATION,LV4,0.7423569,3.281706e-03,1.635365e-02
KEGG_PROTEASOME,LV5,0.9617851,4.389542e-07,8.175521e-06
MIPS_PA700_20S_PA28_COMPLEX,LV5,0.8355090,4.158289e-04,3.052143e-03
REACTOME_MITOCHONDRIAL_PROTEIN_IMPORT,LV8,0.9236095,1.475435e-06,2.243263e-05
KEGG_OXIDATIVE_PHOSPHORYLATION,LV8,0.8644211,1.033789e-09,3.582199e-08
REACTOME_TCA_CYCLE_AND_RESPIRATORY_ELECTRON_TRANSPORT,LV8,0.9969594,5.183099e-18,8.580908e-16
