# ARCHS4 model building with CLAMP

ðŸ’¡ **Environment:** `clamp-analyses`  

This notebook focuses on running CLAMPbase on ARCHS4 data

## Load libraries

In [4]:
if (!requireNamespace("CLAMP", quietly = TRUE)) {
    devtools::install_github("wgmao/CLAMP")
}

library(bigstatsr)
library(data.table)
library(dplyr)
library(rsvd)
library(glmnet)
library(Matrix)
library(knitr)
library(here)
library(CLAMP)
library(CLAMP)

source(here("config.R"))

set.seed(config$ARCHS4$RANDOM_SVD_SEED)

## Output directory

In [5]:
output_dir <- config$ARCHS4$DATASET_FOLDER
dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)

In [6]:
archs4_svdRes <- readRDS(file.path(output_dir, "archs4_svdRes.rds"))

In [7]:
archs4_fbm_filt <- readRDS(file.path(output_dir, "archs4_fbm_filt.rds"))

## Estimate K for CLAMP

In [8]:
CLAMP_K_archs4 <- num.pc(list(d = archs4_svdRes$d)) * 2
message("Inferred CLAMP K = ", CLAMP_K_archs4)

Inferred CLAMP K = 162



In [9]:
saveRDS(CLAMP_K_archs4, file = file.path(output_dir, "CLAMP_K_archs4.rds"))

## CLAMPbase initialization

In [10]:
archs4_baseRes <- CLAMPbase(
  Y      = archs4_fbm_filt,
  svdres = archs4_svdRes,
  trace  = TRUE,
  clamp_k = CLAMP_K_archs4
)

****

CLAMP k is set to 162

L1 is set to 19.2919187521306

L2 is set to 57.8757562563918

Progress 1 / 200 | Bdiff=0.316089, minCor=0.833830

Progress 2 / 200 | Bdiff=0.020564, minCor=0.962591

Progress 3 / 200 | Bdiff=0.011396, minCor=0.977393

Progress 4 / 200 | Bdiff=0.008620, minCor=0.987385

Progress 5 / 200 | Bdiff=0.006959, minCor=0.989738

Progress 6 / 200 | Bdiff=0.005882, minCor=0.989893

Progress 7 / 200 | Bdiff=0.005158, minCor=0.990643

Progress 8 / 200 | Bdiff=0.004646, minCor=0.992001

Progress 9 / 200 | Bdiff=0.004271, minCor=0.993111

Progress 10 / 200 | Bdiff=0.003971, minCor=0.993509

Progress 11 / 200 | Bdiff=0.003710, minCor=0.994166

Progress 12 / 200 | Bdiff=0.003462, minCor=0.994587

Progress 13 / 200 | Bdiff=0.003208, minCor=0.994777

Progress 14 / 200 | Bdiff=0.002936, minCor=0.995376

Progress 15 / 200 | Bdiff=0.002652, minCor=0.995824

Progress 16 / 200 | Bdiff=0.002366, minCor=0.996224

Progress 17 / 200 | Bdiff=0.002100, minCor=0.996345

Progress 18 / 200

In [12]:
archs4_genes <- readRDS(file.path(output_dir, "archs4_genes.rds"))
sample_names <- readRDS(file.path(output_dir, "archs4_samples.rds"))

In [13]:
archs4_baseRes$Z <- data.frame(archs4_baseRes$Z)
rownames(archs4_baseRes$Z) <- archs4_genes
head(archs4_baseRes$Z)

archs4_baseRes$B <- data.frame(archs4_baseRes$B)
colnames(archs4_baseRes$B) <- sample_names
head(archs4_baseRes$B)

Unnamed: 0_level_0,LV1,LV2,LV3,LV4,LV5,LV6,LV7,LV8,LV9,LV10,â‹¯,LV153,LV154,LV155,LV156,LV157,LV158,LV159,LV160,LV161,LV162
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,â‹¯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
A1BG,0,0,0,0.0,0,0.144713,0,0,0,0.1686867,â‹¯,0.0,0,0.0,0.0,0,0.0,0.0,0,0,0.0
A1BG-AS1,0,0,0,0.0,0,0.275832,0,0,0,0.0,â‹¯,0.0,0,0.0,0.2450749,0,0.0,0.0,0,0,0.0
A2M,0,0,0,0.2633904,0,0.0,0,0,0,0.0,â‹¯,0.22824428,0,0.0,0.3939044,0,0.2862303,0.221922,0,0,0.2620452
A4GALT,0,0,0,0.320719,0,0.3502162,0,0,0,0.0,â‹¯,0.03349063,0,0.0,0.0,0,0.0,0.0,0,0,0.0
AAAS,0,0,0,0.0,0,0.2541471,0,0,0,0.0,â‹¯,0.0,0,0.0,0.0,0,0.0,0.0,0,0,0.0
AACS,0,0,0,0.0,0,0.2842216,0,0,0,0.0,â‹¯,0.0,0,0.1614475,0.0,0,0.0,0.0,0,0,0.0


Unnamed: 0_level_0,GSM4372617,GSM5491449,GSM3127398,GSM5957474,GSM3573906,GSM5633169,GSM4372072,GSM5358182,GSM4660685,GSM6656371,â‹¯,GSM6244357,GSM5101191,GSM2800621,GSM3612465,GSM8359157,GSM4649264,GSM5402786,GSM5174325,GSM5344761,GSM7763940
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,â‹¯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
LV1,0.60174815,0.5828152,0.6667292,0.74088493,0.71537696,-0.882825639,-0.829692432,-0.80786592,-0.94544721,-0.890690293,â‹¯,1.49558295,1.3906324,1.35763315,1.3455644,1.4600245,1.323687,1.2205794,1.3120976,1.3791469,1.3080723
LV2,-0.18888891,-0.1895657,-0.1883268,-0.21914764,-0.21309157,-0.015814767,-0.008047739,-0.02079689,0.002705015,-0.01017252,â‹¯,-0.38986731,-0.4091447,-0.41309245,-0.4259002,-0.3492248,-0.35371,-0.3752342,-0.45865856,-0.3841234,-0.3757153
LV3,-0.4403475,-0.3974148,-0.42973011,-0.43874649,-0.39337099,-0.077757348,-0.11670183,-0.03483892,-0.093002473,-0.083132408,â‹¯,-0.45104694,-0.6058479,-0.57126376,-0.5327424,-0.6332632,-0.6373943,-0.653115,-0.50509813,-0.554573,-0.5727197
LV4,-0.16209973,-0.1675097,-0.16120418,-0.1405791,-0.14459061,-0.143572075,-0.171722087,-0.18668756,-0.171375737,-0.166671714,â‹¯,-0.23750931,-0.224385,-0.22242863,-0.2396331,-0.2079097,-0.2200711,-0.2305503,-0.22681622,-0.2513884,-0.2565617
LV5,-0.04497595,-0.059537,-0.04794079,-0.09203017,-0.08747657,-0.002968895,0.040270968,-0.03002153,0.008246652,0.009815058,â‹¯,0.05105698,0.3718388,0.07549137,0.2415645,0.240018,0.4229554,0.4113415,-0.01299845,0.3263619,0.2866605
LV6,1.37700181,1.4199371,1.28551128,1.35450654,1.26817242,-1.205955878,-1.194087467,-1.20584638,-1.162918595,-1.202295644,â‹¯,0.26256508,0.8333812,1.08757958,1.1505072,0.4856849,1.1795087,1.2691279,1.45842068,1.4167504,1.3437901


In [14]:
saveRDS(archs4_baseRes, file = file.path(output_dir, "archs4_baseRes.rds"))

In [15]:
model_dir <- file.path(output_dir, "CLAMPbase")
dir.create(model_dir, showWarnings = FALSE, recursive = TRUE)

B <- archs4_baseRes$B
write.csv(B, file.path(model_dir, "B.csv"))

Z <- archs4_baseRes$Z
write.csv(Z, file.path(model_dir, "Z.csv"))