Skip to content

Commit

Permalink
Re-introduce by-chromosome functionality and allow for .db SNP identi…
Browse files Browse the repository at this point in the history
…fier to be speicified
  • Loading branch information
shugamoe committed Oct 19, 2023
1 parent 47c0e3b commit 318a2f2
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 28 deletions.
7 changes: 5 additions & 2 deletions R/ctwas_impute_expr_z.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@ impute_expr_z <- function (z_snp, weight, ld_pgenfs = NULL, ld_R_dir = NULL, met
strand_ambig_action_z = c("drop", "none", "recover"),
strand_ambig_action_wgt = c("drop", "none", "recover"),
ncore=1, chrom=1:22,
scale_by_ld_variance=T){
scale_by_ld_variance=T,
db_id="rsid"
){
dir.create(outputdir, showWarnings = F)

strand_ambig_action_z <- match.arg(strand_ambig_action_z)
Expand Down Expand Up @@ -125,7 +127,8 @@ impute_expr_z <- function (z_snp, weight, ld_pgenfs = NULL, ld_R_dir = NULL, met
ld_Rinfo=ld_Rinfo,
strand_ambig_action=strand_ambig_action_wgt,
ncore=ncore,
scale_by_ld_variance=scale_by_ld_variance)
scale_by_ld_variance=scale_by_ld_variance,
db_id=db_id)
} else {
stop("Unrecognized weight format, need to use either FUSION format or predict.db format")
}
Expand Down
22 changes: 12 additions & 10 deletions R/ctwas_process_regions.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ index_regions <- function(regionfile,
outname = NULL,
outputdir = getwd(),
ncore = 1,
reuse_R_gene = F) {
reuse_R_gene = F,
chrom = 1:22,
) {

if (is.null(pvarfs) & is.null(ld_Rfs)){
stop("Stopped: missing LD/genotype information.
Expand Down Expand Up @@ -82,7 +84,7 @@ index_regions <- function(regionfile,
}

regionlist <- list()
for (b in 1:length(exprvarfs)){
for (b in chrom){

if (!is.null(pvarfs)){
# get snp info (from pvarf file)
Expand Down Expand Up @@ -205,7 +207,7 @@ index_regions <- function(regionfile,

if ("z" %in% colnames(select)) {
# z score is given, trim snps with lower |z|
for (b in 1: length(regionlist)){
for (b in chrom){
for (rn in names(regionlist[[b]])) {
if (length(regionlist[[b]][[rn]][["sid"]]) > maxSNP){
idx <- match(regionlist[[b]][[rn]][["sid"]], select[, "id"])
Expand All @@ -218,7 +220,7 @@ index_regions <- function(regionfile,
}
} else{
# if no z score information, randomly select snps
for (b in 1: length(regionlist)){
for (b in chrom){
for (rn in names(regionlist[[b]])) {
if (length(regionlist[[b]][[rn]][["sid"]]) > maxSNP){
n.ori <- length(regionlist[[b]][[rn]][["sid"]])
Expand All @@ -237,14 +239,14 @@ index_regions <- function(regionfile,

dir.create(file.path(outputdir, paste0(outname, "_LDR")), showWarnings = F)

wgtall <- lapply(exprvarfs, function(x){load(paste0(strsplit(x, ".exprvar")[[1]], ".exprqc.Rd")); wgtlist})
wgtall <- lapply(exprvarfs[chrom], function(x){load(paste0(strsplit(x, ".exprvar")[[1]], ".exprqc.Rd")); wgtlist})
wgtlistall <- do.call(c, wgtall)
names(wgtlistall) <- do.call(c, lapply(wgtall, names))
rm(wgtall)

regionlist_all <- list()

for (b in 1:22){
for (b in chrom){
regionlist_all[[b]] <- cbind(b, names(regionlist[[b]]))
}

Expand Down Expand Up @@ -372,9 +374,9 @@ index_regions <- function(regionfile,


#' filter regions based on probability of at most 1 causal effect
filter_regions <- function(regionlist, group_prior, prob_single = 0.8, zdf){
filter_regions <- function(regionlist, group_prior, prob_single = 0.8, zdf, chrom=1:22){
regionlist2 <- regionlist
for (b in 1: length(regionlist)){
for (b in chrom){
for (rn in names(regionlist[[b]])) {
gid <- regionlist[[b]][[rn]][["gid"]]
sid <- regionlist[[b]][[rn]][["sid"]]
Expand All @@ -399,9 +401,9 @@ filter_regions <- function(regionlist, group_prior, prob_single = 0.8, zdf){
#' regions allocated to given number of cores
#' regionlist need to contain at least 1 non-empty
#'
region2core <- function(regionlist, ncore = 1){
region2core <- function(regionlist, ncore = 1, chrom=1:22){
dflist <- list()
for (b in 1:length(regionlist)){
for (b in chrom){
if (length(regionlist[[b]]) > 0){
dflist[[b]] <- data.frame("b" = b, "rn"= names(regionlist[[b]]), stringsAsFactors = FALSE)
}
Expand Down
8 changes: 5 additions & 3 deletions R/ctwas_read_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,9 @@ read_weight_predictdb <- function (weight,
ld_pgenfs=NULL,
ld_Rinfo=NULL,
scale_by_ld_variance=T,
ncore=1){
ncore=1,
db_id="rsid"
){

strand_ambig_action <- match.arg(strand_ambig_action)

Expand Down Expand Up @@ -431,11 +433,11 @@ read_weight_predictdb <- function (weight,
wgt <- query("select * from weights where gene = ?", params = list(gname))
wgt.matrix <- as.matrix(wgt[, "weight", drop = F])

rownames(wgt.matrix) <- wgt$rsid
rownames(wgt.matrix) <- wgt[[db_id]]
chrpos <- do.call(rbind, strsplit(wgt$varID, "_"))


snps <- data.frame(gsub("chr", "", chrpos[, 1]), wgt$rsid,
snps <- data.frame(gsub("chr", "", chrpos[, 1]), wgt[[db_id]],
"0", chrpos[, 2], wgt$eff_allele, wgt$ref_allele,
stringsAsFactors = F)
colnames(snps) <- c("chrom", "id", "cm", "pos", "alt", "ref")
Expand Down
37 changes: 26 additions & 11 deletions R/ctwas_rss.R
Original file line number Diff line number Diff line change
Expand Up @@ -150,19 +150,27 @@ ctwas_rss <- function(
outname = NULL,
logfile = NULL,
merge = TRUE,
fine_map = T){
fine_map = T,
chrom=1:22
){

if (!is.null(logfile)){
addHandler(writeToFile, file= logfile, level='DEBUG')
}

loginfo('ctwas started ... ')

if (length(ld_exprfs) != 22){
stop("Not all imputed expression files for 22 chromosomes are provided.")
}
# if (length(ld_exprfs) != 22){
# stop("Not all imputed expression files for 22 chromosomes are provided.")
# }
ld_exprvarfs <- sapply(ld_exprfs, prep_exprvar)

if (!all(chrom == 1:22)){ # Try to have either all 22 or 1 chrom at a time
dummy_ld_exprvarfs <- rep(NA, 22)
dummy_ld_exprvarfs[chrom] <- ld_exprvarfs
ld_exprvarfs <- dummy_ld_exprvarfs
}

if (is.null(ld_pgenfs) & is.null(ld_R_dir)){
stop("Error: need to provide either .pgen file or ld_R file")
} else if (!is.null(ld_pgenfs)){
Expand Down Expand Up @@ -214,11 +222,12 @@ ctwas_rss <- function(
outname = outname,
outputdir = outputdir,
merge = merge,
chrom = chrom,
ncore = ncore_LDR) # susie_rss can't take 1 var.

saveRDS(regionlist, file=paste0(outputdir, "/", outname, ".regionlist.RDS"))

temp_regs <- lapply(1:22, function(x) cbind(x,
temp_regs <- lapply(chrom, function(x) cbind(x,
unlist(lapply(regionlist[[x]], "[[", "start")),
unlist(lapply(regionlist[[x]], "[[", "stop"))))

Expand Down Expand Up @@ -254,7 +263,8 @@ ctwas_rss <- function(
outputdir = outputdir,
outname = paste0(outname, ".s1"),
inv_gamma_shape=inv_gamma_shape,
inv_gamma_rate=inv_gamma_rate
inv_gamma_rate=inv_gamma_rate,
chrom = chrom
)

group_prior <- pars[["group_prior"]]
Expand All @@ -264,7 +274,8 @@ ctwas_rss <- function(
regionlist2 <- filter_regions(regionlist,
group_prior,
prob_single,
zdf)
zdf,
chrom = chrom)

loginfo("Blocks are filtered: %s blocks left",
sum(unlist(lapply(regionlist2, length))))
Expand All @@ -290,7 +301,8 @@ ctwas_rss <- function(
outputdir = outputdir,
outname = paste0(outname, ".s2"),
inv_gamma_shape=inv_gamma_shape,
inv_gamma_rate=inv_gamma_rate)
inv_gamma_rate=inv_gamma_rate,
chrom = chrom)

group_prior <- pars[["group_prior"]]
group_prior_var <- pars[["group_prior_var"]]
Expand Down Expand Up @@ -319,7 +331,8 @@ ctwas_rss <- function(
outname = paste0(outname, ".temp"),
inv_gamma_shape=inv_gamma_shape,
inv_gamma_rate=inv_gamma_rate,
report_parameters=F)
report_parameters=F,
chrom = chrom)

group_prior["SNP"] <- group_prior["SNP"] * thin # convert snp pi1

Expand All @@ -337,13 +350,14 @@ ctwas_rss <- function(
outname = outname, outputdir = outputdir,
merge = merge,
ncore = ncore_LDR,
chrom = chrom,
reuse_R_gene = T) # susie_rss can't take 1 var.

res <- data.table::fread(paste0(file.path(outputdir, outname), ".temp.susieIrss.txt"))

# filter out regions based on max gene PIP of the region
res.keep <- NULL
for (b in 1: length(regionlist)){
for (b in chrom){
for (rn in names(regionlist[[b]])){
gene_PIP <- max(res$susie_pip[res$type != "SNP" & res$region_tag1 == b & res$region_tag2 == rn], 0)
if (gene_PIP < rerun_gene_PIP) {
Expand Down Expand Up @@ -384,7 +398,8 @@ ctwas_rss <- function(
outname = paste0(outname, ".s3"),
inv_gamma_shape=inv_gamma_shape,
inv_gamma_rate=inv_gamma_rate,
report_parameters=F)
report_parameters=F,
chrom = chrom)

res.rerun <- data.table::fread(paste0(file.path(outputdir, outname), ".s3.susieIrss.txt"))

Expand Down
11 changes: 9 additions & 2 deletions R/ctwas_susieI_rss.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,21 @@ susieI_rss <- function(zdf,
outname = NULL,
inv_gamma_shape=1,
inv_gamma_rate=0,
report_parameters=T){
report_parameters=T,
chrom = 1:22){

outname <- file.path(outputdir, outname)

group_prior_var_structure <- match.arg(group_prior_var_structure)

ld_exprvarfs <- sapply(ld_exprfs, prep_exprvar)

if (!all(chrom == 1:22)){ # Try to have either all 22 or 1 chrom at a time
dummy_ld_exprvarfs <- rep(NA, 22)
dummy_ld_exprvarfs[chrom] <- ld_exprvarfs
ld_exprvarfs <- dummy_ld_exprvarfs
}

if (is.null(ld_pgenfs) & is.null(ld_Rfs)){
stop("Error: need to provide either .pgen file or ld_R file")
}
Expand Down Expand Up @@ -146,7 +153,7 @@ susieI_rss <- function(zdf,
cl <- parallel::makeCluster(ncore, outfile = "")
doParallel::registerDoParallel(cl)

corelist <- region2core(regionlist, ncore)
corelist <- region2core(regionlist, ncore, chrom=chrom)

outdf <- foreach (core = 1:length(corelist), .combine = "rbind",
.packages = "ctwas") %dopar% {
Expand Down

0 comments on commit 318a2f2

Please sign in to comment.