-
Notifications
You must be signed in to change notification settings - Fork 29
/
get_example_gwas.R
108 lines (107 loc) · 4.08 KB
/
get_example_gwas.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#' Import GWAS summary statistics
#'
#' Write example GWAS summary statistics to disk.
#' All examples originally come from the
#' \href{https://www.ukbiobank.ac.uk/}{UK Biobank}.
#' To reduce file size, SNPs have been filtered to Minor Allele Frequency (MAF)
#' > 5% and a nominal p-value < 0.05. However, in practice we
#' recommend using full GWAS summary statistics
#' (after applying \link[MungeSumstats]{format_sumstats}).
#'
#' @param trait Which trait to get GWAS summary stats for.
#' @param munged Whether to download the raw or pre-munged
#' version of each GWAS (\emph{Default:} \code{TRUE}).
#' @param storage_dir Folder in which to store the GWAS summary stats.
#' @param verbose Print messages.
#' @inheritParams get_example_gwas_raw
#'
#' @source
#' \code{
#' #### fluid_intelligence ####
#' gwas_sumstats_path <- MAGMA.Celltyping::get_example_gwas(
#' trait = "fluid_intelligence", munged = FALSE)
#' path_formatted <- MungeSumstats::format_sumstats(
#' path=gwas_sumstats_path,
#' save_path = tempfile(fileext = ".formatted.tsv.gz"),
#' ref_genome ="GRCh37")
#' ss <- data.table::fread(path_formatted)
#' ss2 <- ss[MINOR_AF>=.05 & P<.05,]
#' data.table::fwrite(ss2, path_formatted, sep = "\t")
#' piggyback::pb_upload(file = path_formatted,
#' name = "fluid_intelligence.ukb.tsv.gz",
#' repo = "neurogenomics/MAGMA_Celltyping",
#' overwrite = TRUE)
#'
#' #### prospective_memory ####
#' gwas_sumstats_path <- MAGMA.Celltyping::get_example_gwas(
#' trait = "prospective_memory", munged = FALSE)
#' path_formatted <- MungeSumstats::format_sumstats(
#' path=gwas_sumstats_path,
#' save_path = tempfile(fileext = ".formatted.tsv.gz"),
#' ref_genome ="GRCh37")
#' ss <- data.table::fread(path_formatted)
#' ss2 <- ss[MINOR_AF>=.05 & P<.05,]
#' data.table::fwrite(ss2, path_formatted, sep = "\t")
#' piggyback::pb_upload(file = path_formatted,
#' name = "prospective_memory.ukb.tsv.gz",
#' repo = "neurogenomics/MAGMA_Celltyping",
#' overwrite = TRUE)
#' }
#' @return Path to downloaded GWAS summary statistics.
#'
#' @examples
#' path_formatted <- MAGMA.Celltyping::get_example_gwas()
#' @export
#' @importFrom data.table fwrite
#' @importFrom tools R_user_dir
get_example_gwas <- function(trait = c(
"educational_attainment",
"fluid_intelligence",
"prospective_memory"
),
munged = TRUE,
storage_dir = tempdir(),
timeout = 60 * 5,
verbose = TRUE) {
trait <- tolower(trait)[1]
if (munged) {
if (!trait %in% c(
"fluid_intelligence",
"prospective_memory",
"educational_attainment"
)) {
stop("trait must be one of: 'prospective_memory',
'fluid_intelligence', or 'educational_attainment'")
}
messager("Importing munged GWAS summary statistics:", trait,
v=verbose)
if (trait == "educational_attainment") {
unzipped_path <- file.path(
storage_dir,
"educational_attainment.tsv"
)
## gwas_munged is a built-in dataset
data.table::fwrite(
x = MAGMA.Celltyping::gwas_munged,
file = unzipped_path,
sep = "\t"
)
} else {
path <- get_data(
fname = paste0(trait, ".ukb.tsv.gz"),
storage_dir = storage_dir
)
unzipped_path <- decompress(path_formatted = path,
storage_dir = storage_dir,
verbose = verbose)
}
} else {
unzipped_path <- get_example_gwas_raw(
storage_dir = storage_dir,
trait = trait,
timeout = timeout,
verbose = verbose
)
}
return(unzipped_path)
}