In [1]:
# install.packages('languageserver', dependencies = TRUE)
# install.packages('devtools', dependencies = TRUE)
# devtools::install_github('IRkernel/IRkernel',dependencies = TRUE)
# IRkernel::installspec()
# devtools::install_github("mrcieu/gwasglue")
# install_github("jrs95/gassocplot",dependencies=TRUE,upgrade="always")

In [2]:
suppressPackageStartupMessages(suppressWarnings({
    library(gwasglue)
    library(dplyr)
    library(gassocplot)
    library(coloc)
    library(ggplot2)
    library(foreach)
    library(doParallel)
}))
# identify maximum cores available
# ncpus <- detectCores()

# sets how many cores to use
# 2 cores took ~6 min to write gwas hits to file
# 4 cores took ~3 min to write gwas hits to file
registerDoParallel(4)


API: public: http://gwas-api.mrcieu.ac.uk/



In [3]:
# wbc count related to autoimmune disease
gwas_dataset = "ieu-b-30"
dummy_dataset = "ieu-a-7"
gwasinfo(id=as.character(gwas_dataset))
# gwasinfo(id=as.character(dummy_dataset))

id,trait,ontology,unit,sample_size,author,build,mr,category,subcategory,group_name,year,priority,population,consortium,sex,note
<chr>,<chr>,<lgl>,<lgl>,<int>,<chr>,<chr>,<int>,<chr>,<chr>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<lgl>
ieu-b-30,white blood cell count,,,563946,"Vuckovic, D",HG19/GRCh37,1,Continuous,Immune system,public,2020,0,European,Blood Cell Consortium,Males and Females,


In [4]:
# identify top GWAS hits and sort by p-value order (lowest to highest)
top <- ieugwasr::tophits('ieu-b-30') %>% arrange(p)
# str(top)

In [10]:
window_size <- 50000
if (!dir.exists("gwas_hits")) {
    dir.create("gwas_hits")
}

# parallel loop over the top GWAS hits in p-value order and iterate over rows using normal for loop
foreach(i = 1:nrow(top)) %dopar% {
    # str(top[i,])
    # add leading 0s to pval_rank_gwashit so that it always has 3 digits
    pval_rank_gwashit <- sprintf("%03d", i)
    chr_gwashit <- top[i, ]$chr
    pos_gwashit <- top[i, ]$position
    
    # print(paste0("Processing GWAS hit ", pval_rank_gwashit, " of ", nrow(top)))

    lower <- pos_gwashit - window_size
    upper <- pos_gwashit + window_size
    # print(paste(pval_rank_gwashit," ",chr_gwashit," ",lower," ",upper))
    chrpos <- paste0(chr_gwashit, ":", lower, "-", upper)
    # print(chrpos)
    
    # extract SNPS in the region of top hit
    out <- ieugwasr_to_coloc(
        id1 = as.character(gwas_dataset),
        id2 = as.character(dummy_dataset),
        chrompos = as.character(chrpos),
        type1 = "quant",
        # type2 = "cc"
    )

    # get rid of dummy dataset
    out <- out[1]
    # str(out)

    # export to file (tab-delimited) in directory named gwas_hits in the following format:
    # <pval rank>_<chr>_<lower bound>_<upper bound>.tsv
    write.table(out,
        file = paste0(
            "gwas_hits/", pval_rank_gwashit, "_", chr_gwashit, "_", lower, "_", upper, "_gwas.tsv"
        ),
        sep = "\t", quote = FALSE, row.names = FALSE, col.names = FALSE
    )
}

In [None]:
# python3 eqtl_build_db.py
# python3 eqtl_query_db.py

In [None]:
# get list of files in output_fdr directory
files <- list.files(path = "output", pattern = ".*.tsv", full.names = TRUE)

# truncate filelist to first pair of files
# files <- files[1:4]
# files

# This may need to be modified so it's not just taking file names in alphabetical order, but in descending pval order.
for (i in seq(1, length(files), 2)) {
    print(paste("starting",i,files[i],files[i+1]))

    # read in files
    eqtl <- as.list(read.table(file=files[i], header = TRUE, sep = '\t'))
    gwas <- as.list(read.table(file=files[i+1], header = TRUE, sep = '\t'))
    
    # run coloc and write the output to a log file
    res <- coloc::coloc.abf(gwas, eqtl) 
    
    # # make a directory named output_coloc if it doesn't exist
    dir.exists("output_coloc") || dir.create("output_coloc")

    # print numeric value of element of res$summary named PP.H4.abf and filename without extension and without _eQTLS
    # str(res)
    # print(as.numeric(res$summary["PP.H4.abf"]))
    #  remove file extension from files[i] and store in a variable
    SNP_locus_name <- basename(files[i])
    SNP_locus_name <- sub("_eQTLs\\..*$", "", SNP_locus_name)
    SNP_locus_name <- sub("^region_", "", SNP_locus_name)
    
    # write output to a log file, tab-separated
    write.table(paste(SNP_locus_name,as.numeric(res$summary["PP.H1.abf"]),as.numeric(res$summary["PP.H2.abf"]),as.numeric(res$summary["PP.H3.abf"]),as.numeric(res$summary["PP.H4.abf"])), file = paste0("output_coloc/", "PP.H1-4.abf.tsv"), sep = "\t", quote=FALSE, row.names = FALSE, col.names = FALSE, append = TRUE)
    

    # print(paste("eQTL path: ", files[i]))
    # print(paste("GWAS path: ", files[i+1]))
    # create a new list where eqtl and gwas are its elements
    out <- list(eqtl, gwas)

    print(paste("ending",i,files[i],files[i+1]))
}

In [None]:
# asr::tophits('ieu-a-300') %>% arrange(p)   
# top 
# chrpos <- paste0(top$chr[1], ":", top$position[1] - 90000, "-", top$position[1] + 90000)
# out <- ieugwasr_to_coloc(id1='ieu-a-300', id2='ieu-a-7', chrompos=chrpos)
# res <- coloc::coloc.abf(out[[1]], out[[2]])
# temp <- coloc_to_gassocplot(out)
# gassocplot::stack_assoc_plot(temp$markers, temp$z, temp$corr, traits=temp$traits)

# download.file("https://gwas.mrcieu.ac.uk/files/ieu-a-300/ieu-a-300.vcf.gz","ieu-a-300.vcf.gz")
# download.file("https://gwas.mrcieu.ac.uk/files/ieu-a-300/ieu-a-300.vcf.gz.tbi","ieu-a-300.vcf.gz.tbi")
# download.file("https://gwas.mrcieu.ac.uk/files/ieu-a-7/ieu-a-7.vcf.gz","ieu-a-7.vcf.gz")
# download.file("https://gwas.mrcieu.ac.uk/files/ieu-a-7/ieu-a-7.vcf.gz.tbi","ieu-a-7.vcf.gz.tbi")

# chrpos <- "19:11112306-11292306"
# vout <- gwasvcf_to_coloc("ieu-a-300.vcf.gz", "ieu-a-7.vcf.gz", chrpos)
# vres <- coloc::coloc.abf(vout[[1]], vout[[2]])

# library(gassocplot)
# temp <- coloc_to_gassocplot(vout)
# gassocplot::stack_assoc_plot(temp$markers, temp$z, temp$corr, traits=temp$traits)