# GTEx model building with PLIER2 and PLIER

# PLIER2

## Load libraries

In [1]:
library(data.table)
library(here)

here() starts at /home/msubirana/Documents/pivlab/plier2-analyses



## Output directory

In [2]:
source(here("config.R"))
output_data_dir <- config$GTEx$DATASET_FOLDER
dir.create(output_data_dir, showWarnings = FALSE, recursive = TRUE)

# Settings

## Download GTEx 

In [3]:
url <- config$GTEx$URL
dest_dir <-  config$GTEx$DATASET_FOLDER
dest_gz  <- file.path(dest_dir, basename(url))

if (!file.exists(dest_gz)) {
  dir.create(dest_dir, recursive = TRUE, showWarnings = FALSE)
  download.file(url, dest_gz, mode = "wb")
  message("Downloaded to: ", dest_gz)
} else {
  message("File already exists, skipping download.")
}

File already exists, skipping download.



## Preprocess GTEx data

In [4]:
exprs_path  <- file.path(config$GTEx$DATASET_FOLDER, 'GTEx_Analysis_2017-06-05_v8_RNASeQCv1.1.9_gene_tpm.gct.gz')
output_file <- config$GTEx$DATASET_FILE

if (!file.exists(output_file)) {
  dir.create(dirname(output_file), recursive = TRUE, showWarnings = FALSE)
  exprs_data <- read.table(exprs_path, header = TRUE, sep = "\t", skip = 2, check.names = FALSE)
  saveRDS(exprs_data, config$GTEx$DATASET_FILE)
  message("File successfully written to: ", config$GTEx$DATASET_FILE)
} else {
  message("Output file already exists. Skipping.")
}

# Aggregate in-place by 'description'
gtex <- readRDS(here(config$GTEx$DATASET_FILE))
gtex <- as.data.table(gtex)
aggregated_gtex <- gtex[, lapply(.SD, sum), by = Description, .SDcols = is.numeric]

genes <- aggregated_gtex$Description
samples <- colnames(aggregated_gtex[, -1])
data_mat <- as.matrix(aggregated_gtex[, -1])

Output file already exists. Skipping.



In [None]:
library(NMF)

# Clean matrix first
X <- as.matrix(data_mat)
storage.mode(X) <- "double"

# Drop rows that are all NA or sum to 0 (NA-safe)
row_ok <- rowSums(!is.na(X)) > 0 & rowSums(X, na.rm = TRUE) > 0
X <- X[row_ok, , drop = FALSE]

cat("Kept", nrow(X), "genes out of", nrow(data_mat), "\n")

# Run NMF (k = 412) with multiplicative updates
set.seed(42)
k <- 412
fit <- nmf(X, rank = k, method = "brunet", nrun = 1, .opt = "v")

W <- basis(fit)  # genes x k (on the filtered set)
H <- coef(fit)   # k x samples

Loading required package: pkgmaker

Loading required package: registry

Loading required package: rngtools

Loading required package: cluster



NMF - BioConductor layer [OK] | Shared memory capabilities [NO: bigmemory] | Cores 31/32

  To enable shared memory capabilities, try: install.extras('
NMF
')



Kept 54350 genes out of 54592 


NMF algorithm: 'brunet'

NMF seeding method: random

