# ARCHS4 model building with PLIER2

## Load libraries

In [8]:
if (!requireNamespace("PLIER", quietly = TRUE)) {
    devtools::install_github("wgmao/PLIER")
}

if (!requireNamespace("PLIER2", quietly = TRUE)) {
    REPO_PATH <- "/home/msubirana/Documents/pivlab/PLIER2" 
    remotes::install_local(REPO_PATH, force = TRUE, dependencies = FALSE)
}

library(bigstatsr)
library(data.table)
library(dplyr)
library(rsvd)
library(glmnet)
library(Matrix)
library(knitr)
library(here)
library(PLIER)
library(PLIER2)
library(hdf5r)
library(biomaRt)

source(here("config.R"))

set.seed(123)

## Output directory

In [9]:
output_dir <- config$ARCHS4$DATASET_FOLDER
dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)

In [10]:
archs4_fbm_filt <- readRDS(file.path(output_dir, "archs4_fbm_filt.rds"))

## SVD computation and SVD K estimation

In [11]:
g_fb <- nrow(archs4_fbm_filt)
samples_fb <- ncol(archs4_fbm_filt)
SVD_K_archs4  <- min(g_fb, samples_fb) - 1

message("Using SVD K = ", SVD_K_archs4)

Using SVD K = 4999



In [12]:
N_CORES <- config$ARCHS4$PLIER_PARAMS$RANDOM_SVD_N_CORES

if (N_CORES > 1) {
  # if we are parallelizing, then disable BLAS parallelization
  options(bigstatsr.check.parallel.blas = FALSE)
  blas_nproc <- getOption("default.nproc.blas")
  options(default.nproc.blas = NULL)
}

archs4_svdRes=big_randomSVD(
  archs4_fbm_filt,
  k = SVD_K_archs4,
  ncores = N_CORES
)

if (N_CORES > 1) {
  # restore previous state
  options(bigstatsr.check.parallel.blas = TRUE)
  options(default.nproc.blas = blas_nproc)
}

In [13]:
# remove non-finite singular values (NA/NaN/Inf) and corresponding u/v columns

n_before <- length(archs4_svdRes$d)
valid_idx <- which(is.finite(archs4_svdRes$d))

archs4_svdRes$d <- archs4_svdRes$d[valid_idx]
archs4_svdRes$u <- archs4_svdRes$u[, valid_idx, drop = FALSE]
archs4_svdRes$v <- archs4_svdRes$v[, valid_idx, drop = FALSE]

In [14]:
saveRDS(archs4_svdRes, file = file.path(output_dir, "archs4_svdRes.rds"))