# GTEx model with different priors

## Load libraries

In [1]:
if (!requireNamespace("PLIER", quietly = TRUE)) {
    devtools::install_github("wgmao/PLIER")
}

# 3. Install PLIER2 (mchikina/PLIER2) if not already installed
if (!requireNamespace("PLIER2", quietly = TRUE)) {
    REPO_PATH <- "/home/msubirana/Documents/pivlab/PLIER2"  # adjust
    remotes::install_local(REPO_PATH, force = TRUE, dependencies = FALSE)
}

library(bigstatsr)
library(data.table)
library(dplyr)
library(rsvd)
library(glmnet)
library(Matrix)
library(knitr)
library(here)
library(PLIER2)
library(clusterProfiler)
library(org.Hs.eg.db)
library(dplyr)


source(here("config.R"))


Attaching package: ‘dplyr’


The following objects are masked from ‘package:data.table’:

    between, first, last


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: Matrix

Loaded glmnet 4.1-10

here() starts at /home/msubirana/Documents/pivlab/plier2-analyses



clusterProfiler v4.10.0  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/

If you use clusterProfiler in published research, please cite:
T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141


Attaching package: ‘clusterProfiler’


The following object is masked from ‘package:stats’:

    filter


Loading required package: AnnotationDbi

Loading required package: stats4

Loading required package: BiocGener

## Output directory

In [2]:
output_data_dir <- config$GTEx$DATASET_FOLDER
dir.create(output_data_dir, showWarnings = FALSE, recursive = TRUE)

In [3]:
options(repr.matrix.max.rows = Inf)

gtex_plier2_kegg <- readRDS(file.path(output_data_dir, "gtex_KEGG_2021_Human_PLIER2.rds"))

In [4]:
kegg_plier2_summary <- gtex_plier2_kegg$summary %>%
    dplyr::filter(FDR < 0.05 & AUC > 0.7) %>%
    dplyr::group_by(LV) %>%
    dplyr::summarise(pathway = paste(pathway, collapse = ', '), .groups = "drop") %>%
    dplyr::arrange(as.numeric(gsub("[^0-9]", "", LV)))

In [5]:
gtex_baseRes <- readRDS(file.path(output_data_dir, "gtex_PLIER2_baseRes.rds"))

In [6]:
gtex_genes <- readRDS(file.path(output_data_dir, "gtex_genes.rds"))

In [7]:
colnames(gtex_baseRes$Z) <- paste0('LV', seq_len(ncol(gtex_baseRes$Z)))
rownames(gtex_baseRes$Z) <- gtex_genes

In [8]:
head(gtex_baseRes$Z)

Unnamed: 0,LV1,LV2,LV3,LV4,LV5,LV6,LV7,LV8,LV9,LV10,⋯,LV197,LV198,LV199,LV200,LV201,LV202,LV203,LV204,LV205,LV206
WASH7P,0,0,0,0,0,0.0,0,0,0,0,⋯,0,0,0,0,0.0,0,0,0.1239626,0.0,0.0
RP11-34P13.15,0,0,0,0,0,0.3678263,0,0,0,0,⋯,0,0,0,0,0.3107976,0,0,0.0,0.0,0.0
RP11-34P13.16,0,0,0,0,0,0.3559246,0,0,0,0,⋯,0,0,0,0,0.3337613,0,0,0.0,0.0,0.0
RP11-34P13.18,0,0,0,0,0,0.0,0,0,0,0,⋯,0,0,0,0,0.2021179,0,0,0.2418167,0.0,0.0
AP006222.2,0,0,0,0,0,0.0,0,0,0,0,⋯,0,0,0,0,0.3997058,0,0,0.0,0.1570023,0.0
MTND1P23,0,0,0,0,0,0.0,0,0,0,0,⋯,0,0,0,0,0.4758745,0,0,0.0,0.0,0.4125261


In [9]:
kegg_plier2_Z <- data.frame(gtex_baseRes$Z)

In [10]:
Z <- gtex_baseRes$Z

map <- bitr(rownames(Z), fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
map <- map[!duplicated(map$SYMBOL), ]

lv_names <- colnames(Z)

top_syms <- setNames(vector("list", length(lv_names)), lv_names)

'select()' returned 1:many mapping between keys and columns

“20.4% of input gene IDs are fail to map...”


In [11]:
head(top_syms$LV1)

NULL

In [12]:
kegg_plier2_Z %>% 
dplyr::select(LV1)  %>% 
arrange(desc(LV1)) %>%
tail(10)

Unnamed: 0_level_0,LV1
Unnamed: 0_level_1,<dbl>
MT-ND4,0
MT-TH,0
MT-TS2,0
MT-TL2,0
MT-ND5,0
MT-ND6,0
MT-TE,0
MT-CYB,0
MT-TT,0
MT-TP,0


In [13]:
# for (lv in lv_names) {
#   v <- Z[, lv]
#   idx <- which(!is.na(v) & v > 0)
#   if (length(idx) == 0L) { top_syms[[lv]] <- character(0); next }
#   ord <- idx[order(v[idx], decreasing = TRUE)]
#   top_syms[[lv]] <- rownames(Z)[ord]
# }

# top_entrez <- lapply(top_syms, function(syms) {
#   ids <- map$ENTREZID[match(syms, map$SYMBOL)]
#   unique(as.character(ids[!is.na(ids)]))
# })

# kegg_list <- lapply(top_entrez, function(ids) {
#   if (!length(ids)) return(NULL)
#   enrichKEGG(gene = ids, organism = "hsa", qvalueCutoff = 0.05)
# })

In [14]:
for (lv in lv_names) {
  v <- Z[, lv]
  ord <- order(v, decreasing = TRUE, na.last = NA)
  n_top <- max(1L, ceiling(length(ord) * 0.05))
  top_syms[[lv]] <- rownames(Z)[ord][seq_len(n_top)]
}

top_entrez <- lapply(top_syms, function(syms) {
  ids <- map$ENTREZID[match(syms, map$SYMBOL)]
  unique(as.character(ids[!is.na(ids)]))
})

kegg_list <- lapply(top_entrez, function(ids) {
  if (!length(ids)) return(NULL)
  enrichKEGG(gene = ids, organism = "hsa", qvalueCutoff = 0.05)
})

Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/pathway"...

Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/hsa"...



In [15]:
kegg_list$LV15@result  %>% 
dplyr::filter(p.adjust < 0.05)  %>% 
dplyr::pull(Description) 

In [16]:
kegg_plier2_summary  %>%  dplyr::filter(LV == "LV15")

LV,pathway
<chr>,<chr>
LV15,"Drug metabolism, Glycine, serine and threonine metabolism, Peroxisome, Retinol metabolism, Tyrosine metabolism"


In [17]:
kegg_list$LV102@result  %>% 
dplyr::filter(p.adjust < 0.05)  %>% 
dplyr::pull(Description) 

In [18]:
kegg_plier2_summary  %>%  dplyr::filter(LV == "LV102")

LV,pathway
<chr>,<chr>
LV102,"Complement and coagulation cascades, Systemic lupus erythematosus"


In [19]:
kegg_list$LV12@result  %>% 
dplyr::filter(p.adjust < 0.05)  %>% 
dplyr::pull(Description) 

In [20]:
kegg_plier2_summary  %>%  dplyr::filter(LV == "LV12")

LV,pathway
<chr>,<chr>
LV12,"Natural killer cell mediated cytotoxicity, Neutrophil extracellular trap formation, Osteoclast differentiation"


In [21]:
kegg_list$LV117@result  %>% 
dplyr::filter(p.adjust < 0.05)  %>% 
dplyr::pull(Description) 

In [22]:
kegg_plier2_summary  %>%  dplyr::filter(LV == "LV117")

LV,pathway
<chr>,<chr>


In [23]:
kegg_list$LV23@result  %>% 
dplyr::filter(p.adjust < 0.05)  %>% 
dplyr::pull(Description) 

kegg_plier2_summary  %>%  dplyr::filter(LV == "LV23")

LV,pathway
<chr>,<chr>
LV23,"Legionellosis, Pancreatic secretion, Ribosome"


In [24]:
kegg_list$LV44@result  %>% 
dplyr::filter(p.adjust < 0.05)  %>% 
dplyr::pull(Description) 

kegg_plier2_summary  %>%  dplyr::filter(LV == "LV44")

LV,pathway
<chr>,<chr>
LV44,"Alanine, aspartate and glutamate metabolism, Alcoholism, GABAergic synapse"
