-
Notifications
You must be signed in to change notification settings - Fork 4
/
prs_glm.R
80 lines (69 loc) · 2.54 KB
/
prs_glm.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
## ========================================================================== ##
## PRS: Run logistic regression models to estimate effect of PRS on AD
## ========================================================================== ##
# Load packages and functions
library(tidyverse)
library(broom)
zscore = function(x){(x - mean(x)) / sd(x)}
# setwd('/Users/sheaandrews/LOAD_minerva/dummy/shea/Projects/AD_PRS')
## File Paths
prs.file.path <- snakemake@input[["prs"]]
sum.file.path <- snakemake@input[["summary"]]
cohort.pheno.path <- snakemake@input[["pheno_file"]]
## params
exposure = snakemake@params[["pheno"]]
r2 <- snakemake@params[["r2"]]
window <- snakemake@params[["window"]]
# Read in summary file and extract best Pt
message('READ IN SUMMARY FILE \n')
best.prs <- read_tsv(sum.file.path) %>%
pull(Threshold) %>%
as.character()
# Read in Pheno file
message('READ IN PHENOTYPES \n')
pheno <- read_tsv(cohort.pheno.path, col_names = T)
# Read in all PRS file
message('READ IN PRS \n')
prs <- read_csv(prs.file.path) %>%
# rename_at(vars(-FID, -IID), function(x) paste0('prs.pt_', x)) %>%
mutate_at(vars(matches("prs.pt_")), zscore) %>%
select(-prs.pc2)
# extract Pt used
pt <- select(prs, starts_with("prs")) %>% colnames(.)
## Join PRS and Phenotype data
message('Join PRS and Phenotype data \n')
dat <- left_join(prs, pheno) %>%
select(FID:cohort, sex, status, aaoaae2, apoe4dose, jointPC1:jointPC10) %>%
mutate(status = as.factor(status),
sex = as.factor(sex),
cohort = fct_infreq(cohort))
## Peform logistic regression for each Pvalue threshold
message('RUNNING REGRESSIONS \n')
res <- map(pt, function(x){
select(dat, status, x, aaoaae2, sex, apoe4dose, cohort, jointPC1:jointPC10) %>%
glm(status ~ ., family = 'binomial', data = .) %>%
tidy(.) %>%
mutate(pt = x,
pt = case_when(
str_detect(pt, 'prs.pt_') ~ str_replace(pt, 'prs.pt_', "") %>% as.numeric() %>% as.character(),
str_detect(pt, 'prs.pc') ~ str_replace(pt, 'prs.', ""),
TRUE ~ "NA"
),
exposure = exposure)
}) %>%
bind_rows() %>%
# mutate(pt = as.numeric(pt)) %>%
mutate(model = case_when(
pt == '5e-08' & pt == best.prs ~ 'GWS_best',
pt == '5e-08' ~ "GWS",
pt == best.prs ~ 'best',
pt == "pc1" ~ 'PCA',
TRUE ~ "other"
),
r2 = r2,
window = window) %>%
select(exposure, pt, model, everything())
message('EXPORT \n')
write_tsv(res, snakemake@output[["out"]])
# res %>%
# filter(estimate, str_detect(term, 'prs.pt'))