diff --git a/R/ctwas_impute_expr_z.R b/R/ctwas_impute_expr_z.R index 3c45c3c5..0624abe5 100644 --- a/R/ctwas_impute_expr_z.R +++ b/R/ctwas_impute_expr_z.R @@ -64,7 +64,9 @@ impute_expr_z <- function (z_snp, weight, ld_pgenfs = NULL, ld_R_dir = NULL, met strand_ambig_action_z = c("drop", "none", "recover"), strand_ambig_action_wgt = c("drop", "none", "recover"), ncore=1, chrom=1:22, - scale_by_ld_variance=T){ + scale_by_ld_variance=T, + db_id="rsid" + ){ dir.create(outputdir, showWarnings = F) strand_ambig_action_z <- match.arg(strand_ambig_action_z) @@ -125,7 +127,8 @@ impute_expr_z <- function (z_snp, weight, ld_pgenfs = NULL, ld_R_dir = NULL, met ld_Rinfo=ld_Rinfo, strand_ambig_action=strand_ambig_action_wgt, ncore=ncore, - scale_by_ld_variance=scale_by_ld_variance) + scale_by_ld_variance=scale_by_ld_variance, + db_id=db_id) } else { stop("Unrecognized weight format, need to use either FUSION format or predict.db format") } diff --git a/R/ctwas_process_regions.R b/R/ctwas_process_regions.R index 287cfcfe..eb6d1cb9 100644 --- a/R/ctwas_process_regions.R +++ b/R/ctwas_process_regions.R @@ -53,7 +53,9 @@ index_regions <- function(regionfile, outname = NULL, outputdir = getwd(), ncore = 1, - reuse_R_gene = F) { + reuse_R_gene = F, + chrom = 1:22, + ) { if (is.null(pvarfs) & is.null(ld_Rfs)){ stop("Stopped: missing LD/genotype information. @@ -82,7 +84,7 @@ index_regions <- function(regionfile, } regionlist <- list() - for (b in 1:length(exprvarfs)){ + for (b in chrom){ if (!is.null(pvarfs)){ # get snp info (from pvarf file) @@ -205,7 +207,7 @@ index_regions <- function(regionfile, if ("z" %in% colnames(select)) { # z score is given, trim snps with lower |z| - for (b in 1: length(regionlist)){ + for (b in chrom){ for (rn in names(regionlist[[b]])) { if (length(regionlist[[b]][[rn]][["sid"]]) > maxSNP){ idx <- match(regionlist[[b]][[rn]][["sid"]], select[, "id"]) @@ -218,7 +220,7 @@ index_regions <- function(regionfile, } } else{ # if no z score information, randomly select snps - for (b in 1: length(regionlist)){ + for (b in chrom){ for (rn in names(regionlist[[b]])) { if (length(regionlist[[b]][[rn]][["sid"]]) > maxSNP){ n.ori <- length(regionlist[[b]][[rn]][["sid"]]) @@ -237,14 +239,14 @@ index_regions <- function(regionfile, dir.create(file.path(outputdir, paste0(outname, "_LDR")), showWarnings = F) - wgtall <- lapply(exprvarfs, function(x){load(paste0(strsplit(x, ".exprvar")[[1]], ".exprqc.Rd")); wgtlist}) + wgtall <- lapply(exprvarfs[chrom], function(x){load(paste0(strsplit(x, ".exprvar")[[1]], ".exprqc.Rd")); wgtlist}) wgtlistall <- do.call(c, wgtall) names(wgtlistall) <- do.call(c, lapply(wgtall, names)) rm(wgtall) regionlist_all <- list() - for (b in 1:22){ + for (b in chrom){ regionlist_all[[b]] <- cbind(b, names(regionlist[[b]])) } @@ -372,9 +374,9 @@ index_regions <- function(regionfile, #' filter regions based on probability of at most 1 causal effect -filter_regions <- function(regionlist, group_prior, prob_single = 0.8, zdf){ +filter_regions <- function(regionlist, group_prior, prob_single = 0.8, zdf, chrom=1:22){ regionlist2 <- regionlist - for (b in 1: length(regionlist)){ + for (b in chrom){ for (rn in names(regionlist[[b]])) { gid <- regionlist[[b]][[rn]][["gid"]] sid <- regionlist[[b]][[rn]][["sid"]] @@ -399,9 +401,9 @@ filter_regions <- function(regionlist, group_prior, prob_single = 0.8, zdf){ #' regions allocated to given number of cores #' regionlist need to contain at least 1 non-empty #' -region2core <- function(regionlist, ncore = 1){ +region2core <- function(regionlist, ncore = 1, chrom=1:22){ dflist <- list() - for (b in 1:length(regionlist)){ + for (b in chrom){ if (length(regionlist[[b]]) > 0){ dflist[[b]] <- data.frame("b" = b, "rn"= names(regionlist[[b]]), stringsAsFactors = FALSE) } diff --git a/R/ctwas_read_data.R b/R/ctwas_read_data.R index 21f0c14f..5978604b 100644 --- a/R/ctwas_read_data.R +++ b/R/ctwas_read_data.R @@ -357,7 +357,9 @@ read_weight_predictdb <- function (weight, ld_pgenfs=NULL, ld_Rinfo=NULL, scale_by_ld_variance=T, - ncore=1){ + ncore=1, + db_id="rsid" + ){ strand_ambig_action <- match.arg(strand_ambig_action) @@ -431,11 +433,11 @@ read_weight_predictdb <- function (weight, wgt <- query("select * from weights where gene = ?", params = list(gname)) wgt.matrix <- as.matrix(wgt[, "weight", drop = F]) - rownames(wgt.matrix) <- wgt$rsid + rownames(wgt.matrix) <- wgt[[db_id]] chrpos <- do.call(rbind, strsplit(wgt$varID, "_")) - snps <- data.frame(gsub("chr", "", chrpos[, 1]), wgt$rsid, + snps <- data.frame(gsub("chr", "", chrpos[, 1]), wgt[[db_id]], "0", chrpos[, 2], wgt$eff_allele, wgt$ref_allele, stringsAsFactors = F) colnames(snps) <- c("chrom", "id", "cm", "pos", "alt", "ref") diff --git a/R/ctwas_rss.R b/R/ctwas_rss.R index e40d7527..4b959667 100644 --- a/R/ctwas_rss.R +++ b/R/ctwas_rss.R @@ -150,7 +150,9 @@ ctwas_rss <- function( outname = NULL, logfile = NULL, merge = TRUE, - fine_map = T){ + fine_map = T, + chrom=1:22 + ){ if (!is.null(logfile)){ addHandler(writeToFile, file= logfile, level='DEBUG') @@ -158,11 +160,17 @@ ctwas_rss <- function( loginfo('ctwas started ... ') - if (length(ld_exprfs) != 22){ - stop("Not all imputed expression files for 22 chromosomes are provided.") - } + # if (length(ld_exprfs) != 22){ + # stop("Not all imputed expression files for 22 chromosomes are provided.") + # } ld_exprvarfs <- sapply(ld_exprfs, prep_exprvar) + if (!all(chrom == 1:22)){ # Try to have either all 22 or 1 chrom at a time + dummy_ld_exprvarfs <- rep(NA, 22) + dummy_ld_exprvarfs[chrom] <- ld_exprvarfs + ld_exprvarfs <- dummy_ld_exprvarfs + } + if (is.null(ld_pgenfs) & is.null(ld_R_dir)){ stop("Error: need to provide either .pgen file or ld_R file") } else if (!is.null(ld_pgenfs)){ @@ -214,11 +222,12 @@ ctwas_rss <- function( outname = outname, outputdir = outputdir, merge = merge, + chrom = chrom, ncore = ncore_LDR) # susie_rss can't take 1 var. saveRDS(regionlist, file=paste0(outputdir, "/", outname, ".regionlist.RDS")) - temp_regs <- lapply(1:22, function(x) cbind(x, + temp_regs <- lapply(chrom, function(x) cbind(x, unlist(lapply(regionlist[[x]], "[[", "start")), unlist(lapply(regionlist[[x]], "[[", "stop")))) @@ -254,7 +263,8 @@ ctwas_rss <- function( outputdir = outputdir, outname = paste0(outname, ".s1"), inv_gamma_shape=inv_gamma_shape, - inv_gamma_rate=inv_gamma_rate + inv_gamma_rate=inv_gamma_rate, + chrom = chrom ) group_prior <- pars[["group_prior"]] @@ -264,7 +274,8 @@ ctwas_rss <- function( regionlist2 <- filter_regions(regionlist, group_prior, prob_single, - zdf) + zdf, + chrom = chrom) loginfo("Blocks are filtered: %s blocks left", sum(unlist(lapply(regionlist2, length)))) @@ -290,7 +301,8 @@ ctwas_rss <- function( outputdir = outputdir, outname = paste0(outname, ".s2"), inv_gamma_shape=inv_gamma_shape, - inv_gamma_rate=inv_gamma_rate) + inv_gamma_rate=inv_gamma_rate, + chrom = chrom) group_prior <- pars[["group_prior"]] group_prior_var <- pars[["group_prior_var"]] @@ -319,7 +331,8 @@ ctwas_rss <- function( outname = paste0(outname, ".temp"), inv_gamma_shape=inv_gamma_shape, inv_gamma_rate=inv_gamma_rate, - report_parameters=F) + report_parameters=F, + chrom = chrom) group_prior["SNP"] <- group_prior["SNP"] * thin # convert snp pi1 @@ -337,13 +350,14 @@ ctwas_rss <- function( outname = outname, outputdir = outputdir, merge = merge, ncore = ncore_LDR, + chrom = chrom, reuse_R_gene = T) # susie_rss can't take 1 var. res <- data.table::fread(paste0(file.path(outputdir, outname), ".temp.susieIrss.txt")) # filter out regions based on max gene PIP of the region res.keep <- NULL - for (b in 1: length(regionlist)){ + for (b in chrom){ for (rn in names(regionlist[[b]])){ gene_PIP <- max(res$susie_pip[res$type != "SNP" & res$region_tag1 == b & res$region_tag2 == rn], 0) if (gene_PIP < rerun_gene_PIP) { @@ -384,7 +398,8 @@ ctwas_rss <- function( outname = paste0(outname, ".s3"), inv_gamma_shape=inv_gamma_shape, inv_gamma_rate=inv_gamma_rate, - report_parameters=F) + report_parameters=F, + chrom = chrom) res.rerun <- data.table::fread(paste0(file.path(outputdir, outname), ".s3.susieIrss.txt")) diff --git a/R/ctwas_susieI_rss.R b/R/ctwas_susieI_rss.R index 040b8f39..1ca639fe 100644 --- a/R/ctwas_susieI_rss.R +++ b/R/ctwas_susieI_rss.R @@ -94,7 +94,8 @@ susieI_rss <- function(zdf, outname = NULL, inv_gamma_shape=1, inv_gamma_rate=0, - report_parameters=T){ + report_parameters=T, + chrom = 1:22){ outname <- file.path(outputdir, outname) @@ -102,6 +103,12 @@ susieI_rss <- function(zdf, ld_exprvarfs <- sapply(ld_exprfs, prep_exprvar) + if (!all(chrom == 1:22)){ # Try to have either all 22 or 1 chrom at a time + dummy_ld_exprvarfs <- rep(NA, 22) + dummy_ld_exprvarfs[chrom] <- ld_exprvarfs + ld_exprvarfs <- dummy_ld_exprvarfs + } + if (is.null(ld_pgenfs) & is.null(ld_Rfs)){ stop("Error: need to provide either .pgen file or ld_R file") } @@ -146,7 +153,7 @@ susieI_rss <- function(zdf, cl <- parallel::makeCluster(ncore, outfile = "") doParallel::registerDoParallel(cl) - corelist <- region2core(regionlist, ncore) + corelist <- region2core(regionlist, ncore, chrom=chrom) outdf <- foreach (core = 1:length(corelist), .combine = "rbind", .packages = "ctwas") %dopar% {