# ARCHS4 model building with CLAMP

ðŸ’¡ **Environment:** `clamp-analyses`  

This notebook performs Singular Value Decomposition (SVD) on the ARCHS4 gene expression dataset. The goal is to reduce dimensionality and extract principal components that capture the most variance in the data.

## Load libraries

In [None]:
if (!requireNamespace("PLIER", quietly = TRUE)) {
    devtools::install_github("wgmao/PLIER")
}

if (!requireNamespace("CLAMP", quietly = TRUE)) {
    REPO_PATH <- "/home/msubirana/Documents/pivlab/CLAMP" 
    remotes::install_local(REPO_PATH, force = TRUE, dependencies = FALSE)
}

library(here)

source(here("config.R"))

set.seed(config$ARCHS4$RANDOM_SVD_SEED)

here() starts at /home/msubirana/Documents/pivlab/clamp-analyses



## Output directory

In [2]:
output_dir <- config$ARCHS4$DATASET_FOLDER
dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)

In [3]:
archs4_fbm_filt <- readRDS(file.path(output_dir, "archs4_fbm_filt.rds"))

## SVD computation and SVD K estimation

In [4]:
g_fb <- nrow(archs4_fbm_filt)
samples_fb <- ncol(archs4_fbm_filt)
SVD_K_archs4  <- min(g_fb, samples_fb) - 1

message("Using SVD K = ", SVD_K_archs4)

Using SVD K = 699



In [6]:
N_CORES <- config$ARCHS4$CLAMP_PARAMS$RANDOM_SVD_N_CORES

if (N_CORES > 1) {
  # if we are parallelizing, then disable BLAS parallelization
  options(bigstatsr.check.parallel.blas = FALSE)
  blas_nproc <- getOption("default.nproc.blas")
  options(default.nproc.blas = NULL)
}

archs4_svdRes=big_randomSVD(
  archs4_fbm_filt,
  k = SVD_K_archs4,
  ncores = N_CORES
)

if (N_CORES > 1) {
  # restore previous state
  options(bigstatsr.check.parallel.blas = TRUE)
  options(default.nproc.blas = blas_nproc)
}

In [7]:
# remove non-finite singular values (NA/NaN/Inf) and corresponding u/v columns
n_before <- length(archs4_svdRes$d)
valid_idx <- which(is.finite(archs4_svdRes$d))

archs4_svdRes$d <- archs4_svdRes$d[valid_idx]
archs4_svdRes$u <- archs4_svdRes$u[, valid_idx, drop = FALSE]
archs4_svdRes$v <- archs4_svdRes$v[, valid_idx, drop = FALSE]

In [8]:
saveRDS(archs4_svdRes, file = file.path(output_dir, "archs4_svdRes.rds"))