In [1]:
suppressPackageStartupMessages(require(tidyverse))
suppressPackageStartupMessages(require(data.table))
library(latex2exp)


In [37]:
wd <- getwd()

cascade_files <- file.path(wd, 'cascade.input.files.tsv')

array_hits <- file.path(wd, 'out_v3', 'cascade.array.hits.tsv.zst')

plot_filehead <- list(
    PAVs = file.path(wd, 'out_v3', "cascade.PAVs"),
    PTVs = file.path(wd, 'out_v3', "cascade.PTVs")
)


In [3]:
traits <- fread(cascade_files)

array_anno_df <- fread(cmd=paste('zstdcat', array_hits, sep=' '), sep='\t')


In [29]:
plots <- list()

In [30]:
plots[['PTVs']] <- array_anno_df %>% 
filter(Csq == 'protein-truncating', ld_indep, is_outside_of_MHC) %>%
mutate(
    outlier = if_else(abs(Effect) >= .1, TRUE, FALSE),
    ggrepel = if_else(abs(Effect) >= .1, Gene_symbol, "")
) %>%
ggplot(
    aes(x=maf, y=Effect, color=outlier, label = ggrepel)
) +
geom_hline(yintercept = 0, linetype="dashed") +
geom_point() + theme_bw() + 
labs(
    title = TeX('Protein-truncating variant associations ($p \\,<\\, 5x10^{-9}$)'),
    x = 'Minor allele frequency (log-scale)',
    y = 'BETA'
) +
theme(
    strip.text = element_text(size=7),
    legend.position="none"
)+
scale_color_brewer(palette="Dark2") + 
scale_x_continuous(trans="log10", breaks=10 ** c(-4, -2, 0)) + 
facet_wrap(~name, ncol = 6, strip.position = "bottom", labeller = label_wrap_gen(17)) + 
ggrepel::geom_text_repel(size=2, fontface=3)


In [44]:
plots[['PAVs']] <- array_anno_df %>% 
filter(Csq == 'protein-altering', ld_indep, is_outside_of_MHC) %>%
group_by(name) %>%
mutate(
    rank_abs_effect = rank(-abs(Effect))
) %>%
ungroup() %>%
mutate(
    outlier = if_else(abs(Effect) >= .1, TRUE, FALSE),
    ggrepel = if_else((abs(Effect) >= .1) & (rank_abs_effect < 7) , Gene_symbol, "")
) %>%
ggplot(
    aes(x=maf, y=Effect, color=outlier, label = ggrepel)
) +
geom_hline(yintercept = 0, linetype="dashed") +
geom_point() + theme_bw() + 
labs(
    title = TeX('Protein-altering variant associations ($p \\,<\\, 5x10^{-9}$)'),
    x = 'Minor allele frequency (log-scale)',
    y = 'BETA'
) +
theme(
    strip.text = element_text(size=7),
    legend.position="none"
)+
scale_color_brewer(palette="Dark2") + 
scale_x_continuous(trans="log10", breaks=10 ** c(-4, -2, 0)) + 
facet_wrap(~name, ncol = 6, strip.position = "bottom", labeller = label_wrap_gen(17)) + 
ggrepel::geom_text_repel(size=2, fontface=3)


In [45]:
for(v in c('PTVs', 'PAVs')){
    for(ext in c('png', 'pdf')){ 
        ggsave(
            paste(plot_filehead[[v]], ext, sep='.'), 
            plot=plots[[v]], width = 10, height = 10
        )
    }
}

In [36]:
setdiff(
    traits %>% select(name) %>% pull(),
    array_anno_df %>% filter(Csq == 'protein-truncating') %>% select(name) %>% unique() %>% pull()
)

In [34]:
setdiff(
    traits %>% select(name) %>% pull(),
    array_anno_df %>% filter(Csq == 'protein-altering') %>% select(name) %>% unique() %>% pull()
)