In [1]:
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(latex2exp))
suppressPackageStartupMessages(library(gridExtra))

In [2]:
annot.tbl <- '/oak/stanford/groups/mrivas/private_data/ukbb/variant_filtering/variant_filter_table.tsv.gz'


In [3]:
read_annotation_tbl <- function(annot.tbl){
    annot.arr <- fread(
        cmd=paste0('zcat ', annot.tbl),
        sep='\t', data.table=FALSE
    ) %>% mutate(
        MAF=pmin(freq, 1-freq)
    ) %>%
    mutate(
        variant = paste(CHROM, POS, REF, ALT, sep=':'),
        is_outside_of_MHC = (as.numeric(CHROM) == 6 & as.numeric(POS) < 25477797) | ( as.numeric(CHROM) == 6 & 36448354 < as.numeric(POS)) | as.numeric(CHROM) != 6
    )

    annot.arr$Csq[
        !(annot.arr$Consequence %in% c("frameshift_variant","splice_donor_variant","stop_gained","stop_lost","start_lost","splice_acceptor_variant","splice_region_variant","missense_variant","inframe_insertion","inframe_deletion"))
    ] = "non-coding"
    annot.arr$Csq[
        annot.arr$Consequence %in% c("splice_region_variant","missense_variant","inframe_insertion","inframe_deletion")
    ] = "protein-altering"
    annot.arr$Csq[
        annot.arr$Consequence %in% c("frameshift_variant","splice_donor_variant","stop_gained","stop_lost","start_lost","splice_acceptor_variant")
    ] = "protein-truncating"

    annot.arr 
}

In [4]:
annot.arr <- read_annotation_tbl(annot.tbl)

In [5]:
annot.arr %>% colnames()

In [6]:
annot.arr %>% mutate(
    Csq2 = if_else(Csq == 'non-coding', 'non-coding', 'PTVs+protein-altering')
) %>% filter(0.0001 < MAF, MAF < 0.01) %>% count(is_outside_of_MHC, Csq2)


is_outside_of_MHC,Csq2,n
<lgl>,<chr>,<int>
False,non-coding,857
False,PTVs+protein-altering,631
True,non-coding,34599
True,PTVs+protein-altering,41637


In [7]:
IOPs <- list()
for(GBE_ID in c('INI2005254', 'INI2005255')){
    IOP_file <- file.path(
        '/oak/stanford/groups/mrivas/projects/ANGPTL7/ukbb_gwas/white_british',
        paste0('ukb24983_v2_hg19.', GBE_ID, '.genotyped.glm.linear.gz')
    )
    IOPs[[GBE_ID]] <- fread(
        cmd=paste0('zcat ', IOP_file, ' | sed -e "s/#//g"'), 
        sep='\t', data.table=F
    )
}


In [8]:
IOPs[['INI2005254']] %>% dim() %>% print()
IOPs[['INI2005254']] %>% drop_na() %>% dim() %>% print()

[1] 784256     12
[1] 769187     12


In [9]:
IOPs[['INI2005255']] %>% dim() %>% print()
IOPs[['INI2005255']] %>% drop_na() %>% dim() %>% print()


[1] 784256     12
[1] 769187     12


In [107]:
Glaucoma <- fread(
    cmd=paste0(
        'zcat ',  
        file.path(
            '/oak/stanford/groups/mrivas/projects/ANGPTL7/ukbb_gwas/white_british',
            paste0('ukb24983_v2_hg19.', 'HC276', '.genotyped.glm.logistic.hybrid.gz')
        ), ' | sed -e "s/#//g"'), 
    sep='\t', data.table=F
)


In [10]:
annot.arr %>% mutate(
    Csq2 = if_else(Csq == 'non-coding', 'non-coding', 'PTVs+protein-altering')
) %>% filter(0.0001 < MAF, MAF < 0.01) %>% 
select(ID, is_outside_of_MHC, Csq2) %>% 
inner_join(IOPs[['INI2005254']] %>% drop_na() %>% select(ID), by='ID') %>%
count(is_outside_of_MHC, Csq2)

is_outside_of_MHC,Csq2,n
<lgl>,<chr>,<int>
False,non-coding,857
False,PTVs+protein-altering,631
True,non-coding,34592
True,PTVs+protein-altering,41590


In [11]:
annot.arr %>% mutate(
    Csq2 = if_else(Csq == 'non-coding', 'non-coding', 'PTVs+protein-altering')
) %>% filter(0.0001 < MAF, MAF < 0.01) %>% 
select(ID, is_outside_of_MHC, Csq2) %>% 
inner_join(IOPs[['INI2005255']] %>% drop_na() %>% select(ID), by='ID') %>%
count(is_outside_of_MHC, Csq2)

is_outside_of_MHC,Csq2,n
<lgl>,<chr>,<int>
False,non-coding,857
False,PTVs+protein-altering,631
True,non-coding,34592
True,PTVs+protein-altering,41590


## ANGPTL7

In [12]:
annot.arr %>% filter(Gene_symbol == 'ANGPTL7')

CHROM,POS,REF,ALT,ID,Gene,Consequence,HGVSp,LoF,LoF_filter,⋯,mcpi,gnomad_af,mgi,mgi_notes,all_filters,Gene_symbol,MAF,variant,is_outside_of_MHC,Csq
<int>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<int>,<chr>,<chr>,<chr>,<int>,<chr>,<dbl>,<chr>,<lgl>,<chr>
1,11252357,A,G,rs200058074,ENSG00000171819,missense_variant,ENSP00000366015.3:p.Gln136Arg,,,⋯,0,,,,0,ANGPTL7,0.0005355845,1:11252357:A:G,True,protein-altering
1,11252369,G,A,rs28991002,ENSG00000171819,missense_variant,ENSP00000366015.3:p.Arg140His,,,⋯,0,,,,0,ANGPTL7,0.002532551,1:11252369:G:A,True,protein-altering
1,11253684,G,T,rs28991009,ENSG00000171819,missense_variant,ENSP00000366015.3:p.Gln175His,,,⋯,0,,,,0,ANGPTL7,0.008116199,1:11253684:G:T,True,protein-altering
1,11253688,C,T,rs143435072,ENSG00000171819,stop_gained,ENSP00000366015.3:p.Arg177Ter,HC,,⋯,0,PASS,,,0,ANGPTL7,0.0004063244,1:11253688:C:T,True,protein-truncating
1,11255013,GGCAT,G,Affx-89021330,ENSG00000171819,frameshift_variant,ENSP00000366015.3:p.His326AspfsTer11,HC,,⋯,0,,,,0,ANGPTL7,1.483913e-05,1:11255013:GGCAT:G,True,protein-truncating


In [13]:
annot.arr %>% filter(Gene_symbol == 'ANGPTL7', 0.0001 < MAF, MAF < 0.01)

CHROM,POS,REF,ALT,ID,Gene,Consequence,HGVSp,LoF,LoF_filter,⋯,mcpi,gnomad_af,mgi,mgi_notes,all_filters,Gene_symbol,MAF,variant,is_outside_of_MHC,Csq
<int>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<int>,<chr>,<chr>,<chr>,<int>,<chr>,<dbl>,<chr>,<lgl>,<chr>
1,11252357,A,G,rs200058074,ENSG00000171819,missense_variant,ENSP00000366015.3:p.Gln136Arg,,,⋯,0,,,,0,ANGPTL7,0.0005355845,1:11252357:A:G,True,protein-altering
1,11252369,G,A,rs28991002,ENSG00000171819,missense_variant,ENSP00000366015.3:p.Arg140His,,,⋯,0,,,,0,ANGPTL7,0.0025325508,1:11252369:G:A,True,protein-altering
1,11253684,G,T,rs28991009,ENSG00000171819,missense_variant,ENSP00000366015.3:p.Gln175His,,,⋯,0,,,,0,ANGPTL7,0.0081161985,1:11253684:G:T,True,protein-altering
1,11253688,C,T,rs143435072,ENSG00000171819,stop_gained,ENSP00000366015.3:p.Arg177Ter,HC,,⋯,0,PASS,,,0,ANGPTL7,0.0004063244,1:11253688:C:T,True,protein-truncating


In [14]:
show_sumstats <- function(IOP_df, annot.arr){
    annot.arr %>% filter(Gene_symbol == 'ANGPTL7', 0.0001 < MAF, MAF < 0.01) %>%
    select(variant, ID, Consequence, HGVSp, freq, MAF, ld_indep) %>%
    left_join(
        IOP_df, by='ID'
    ) %>%
    rename(
        rsID = ID
    ) %>%
    mutate(
        BETA_CI_l = BETA - 1.96 * SE,
        BETA_CI_u = BETA + 1.96 * SE,
        MAF_percent = sprintf('%.4f%%', MAF * 100),
        HGVSp = str_replace_all(HGVSp, 'ENSP00000366015.3:', ''),
        BETA_str = sprintf('%.4f [%.3f, %.3f]', BETA, BETA_CI_l, BETA_CI_u)
    ) %>% 
    select(variant, rsID, HGVSp, MAF_percent, BETA_str, P)    
}

In [30]:
IOPs[['INI2005255']] %>% show_sumstats(annot.arr)

variant,rsID,HGVSp,MAF_percent,BETA_str,P
<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>
1:11252357:A:G,rs200058074,p.Gln136Arg,0.0536%,"-0.0297 [-0.248, 0.189]",0.789729
1:11252369:G:A,rs28991002,p.Arg140His,0.2533%,"-0.1497 [-0.244, -0.055]",0.00186485
1:11253684:G:T,rs28991009,p.Gln175His,0.8116%,"-0.2004 [-0.253, -0.148]",1.07008e-13
1:11253688:C:T,rs143435072,p.Arg177Ter,0.0406%,"-0.2856 [-0.533, -0.038]",0.0238987


In [31]:
IOPs[['INI2005254']] %>% show_sumstats(annot.arr)

variant,rsID,HGVSp,MAF_percent,BETA_str,P
<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>
1:11252357:A:G,rs200058074,p.Gln136Arg,0.0536%,"0.0124 [-0.203, 0.228]",0.910273
1:11252369:G:A,rs28991002,p.Arg140His,0.2533%,"-0.0714 [-0.165, 0.022]",0.133283
1:11253684:G:T,rs28991009,p.Gln175His,0.8116%,"-0.1552 [-0.207, -0.103]",5.9617e-09
1:11253688:C:T,rs143435072,p.Arg177Ter,0.0406%,"-0.1285 [-0.374, 0.117]",0.303964


In [32]:
IOPs[['INI2005254']] %>% show_sumstats(annot.arr) %>%
rename('INI2005254_BETA' = 'BETA_str', 'INI2005254_P' = 'P') %>%
inner_join(
    IOPs[['INI2005255']] %>% show_sumstats(annot.arr) %>%
    rename('INI2005255_BETA' = 'BETA_str', 'INI2005255_P' = 'P'),
    by=c('variant', 'rsID', 'HGVSp', 'MAF_percent')
) %>%
fwrite('Tbl1_INI2005254_INI2005255.tsv', sep='\t')

## Hits

In [33]:
extract_hits <- function(IOP_df, annot.arr){
    IOP_hits <- IOP_df %>% filter(as.numeric(P) <= 0.0001) %>% 
    select(-REF, -ALT, -A1, -TEST, -OBS_CT, -T_STAT) %>%
    left_join(annot.arr %>% select(-CHROM, -POS), by='ID') %>% 
    arrange(as.numeric(CHROM), as.numeric(POS)) %>%
    select(-CHROM, -POS) %>%
    rename(variant_ID = ID) %>%
    select(variant, variant_ID, BETA, SE, P, Csq, Consequence, Gene_symbol, HGVSp, MAF, ld_indep)    
}

In [34]:
IOPs[['INI2005254']] %>% extract_hits(annot.arr) %>%
fwrite('INI2005254.hits.tsv', sep='\t', row.names=FALSE)


In [35]:
IOPs[['INI2005255']] %>% extract_hits(annot.arr) %>%
fwrite('INI2005255.hits.tsv', sep='\t', row.names=FALSE)


## qq-plot

In [44]:
get_IOP_anno_df <- function(IOP_df, annot.arr){
    annot.arr %>% 
    filter(0.0001 < MAF, MAF < 0.01, Csq != 'non-coding', is_outside_of_MHC) %>%
    select(-CHROM, -POS, -REF, -ALT) %>%
    inner_join(IOP_df %>% drop_na(), by='ID')
}


In [45]:
qq_plot <- function(IOP_df, annot.arr){
    qq_p_obs <- IOP_df %>% get_IOP_anno_df(annot.arr) %>%
    mutate(log10P = -log10(P)) %>% arrange(-log10P) %>% select(log10P) %>% drop_na() %>% pull() 
    
    qq_p_exp <- -(qq_p_obs %>% length() %>% ppoints() %>% log10())
    
    data.frame(
        Observed = qq_p_obs,
        Expected = qq_p_exp
    ) %>% ggplot(aes(x = Expected, y = Observed)) + geom_point() + 
    geom_abline(slope=1, intercept=0, color='red') + 
    theme_bw() + 
    labs(
        y = TeX('Observed $-\\log_{10}(P)$'),
        x = TeX('Expected $-\\log_{10}(P)$')
    )     
}

In [74]:
qq_plot_g <- arrangeGrob(
    IOPs[['INI2005254']] %>% qq_plot(annot.arr) + labs(title='(A) corneal compensated IOP (INI2005254)'),
    IOPs[['INI2005255']] %>% qq_plot(annot.arr) + labs(title='(B) Goldman-correlated IOP (INI2005255)'),
    nrow=1
)


In [75]:
ggsave('qq.png', qq_plot_g, width=12, height=6)
ggsave('qq.pdf', qq_plot_g, width=12, height=6)


In [36]:
0.66 - 1.96 * 0.15

In [37]:
0.66 + 1.96 * 0.15

In [38]:
0.61 - 1.96 * 0.13

In [39]:
0.61 + 1.96 * 0.13

### Manhattan plot

In [52]:
compute_gwas_plot_df <- function(gwasResults){
    gwasResults  %>%
    # Compute chromosome size
    group_by(CHR) %>% 
    summarise(chr_len=max(BP)) %>% 

    # Calculate cumulative position of each chromosome
    mutate(tot=cumsum(chr_len)-chr_len) %>%
    select(-chr_len) %>%

    # Add this info to the initial dataset
    left_join(gwasResults, ., by=c("CHR"="CHR")) %>%

    # Add a cumulative position of each SNP
    arrange(CHR, BP) %>%
    mutate(BPcum=BP+tot)    
}


In [53]:
compute_x_axis_df <- function(don){
    don %>% 
    group_by(CHR) %>% 
    summarize(center=( max(BPcum) + min(BPcum) ) / 2 ) %>%
    mutate(
        CHR_plot = if_else((CHR %% 2) == 0 & (CHR > 15), "", as.character(CHR))
    )    
}


In [54]:
plot_manhattan <- function(don, axisdf, title_str){
    don %>% 
    ggplot( aes(x=BPcum, y=-log10(P), label=repel_label) ) +
    geom_point( aes(color=as.factor(color)), alpha=0.8, size=1.3) +
    geom_hline(yintercept=6, color='red', linetype="dashed") + 
    ggrepel::geom_text_repel(size=3) +
    scale_x_continuous(
        label = axisdf$CHR_plot, breaks= axisdf$center
    ) +
    scale_y_continuous(expand = c(0, 0.5) ) +
    theme_bw() +
    theme( 
      legend.position="none",
      panel.border = element_blank(),
      panel.grid.major.x = element_blank(),
      panel.grid.minor.x = element_blank()
    ) +
    labs(
        title=title_str,
        x = 'Genomic position (chromosome)',
        y = TeX('$-\\log_{10\\,}P$')
    )    
}


In [69]:
generate_manhattan_plot_from_IOP_df <- function(IOP_df, annot.arr){
    IOP_Manhattan_plot_df <- IOP_df %>% 
    get_IOP_anno_df(annot.arr) %>% 
    mutate(
        P = as.numeric(P),
        rankP = rank(P),
        repel_label = if_else((rankP <= 30) & (P <= 1e-6), Gene_symbol, ''),
        color = if_else( (CHROM %% 2) == 1, "1_odd_chrs", "2_even_chrs"),
        POS = as.numeric(POS)
    ) %>%
    rename(CHR = CHROM, BP = POS, SNP = ID) %>%
    select(CHR, BP, SNP, P, BETA, repel_label, color, MAF) %>%
    compute_gwas_plot_df() %>%
    filter(P < 1e-2)

    IOP_Manhattan_p <- IOP_Manhattan_plot_df %>% 
    plot_manhattan(
        IOP_Manhattan_plot_df %>% compute_x_axis_df(), ''
    )
    
    IOP_Manhattan_p
}

In [76]:
manhattan_g <- arrangeGrob(
    IOPs[['INI2005254']] %>% generate_manhattan_plot_from_IOP_df(annot.arr) + labs(title='(A) corneal compensated IOP (INI2005254)'),
    IOPs[['INI2005255']] %>% generate_manhattan_plot_from_IOP_df(annot.arr) + labs(title='(B) Goldman-correlated IOP (INI2005255)'),
    nrow=1
)


In [77]:
ggsave('manhattan.png', manhattan_g, width=12, height=6)
ggsave('manhattan.pdf', manhattan_g, width=12, height=6)


In [81]:
tmp <- IOPs[['INI2005255']] %>% get_IOP_anno_df(annot.arr)

In [82]:
tmp %>% filter(P < 1e-6)

ID,Gene,Consequence,HGVSp,LoF,LoF_filter,LoF_flags,LoF_info,consequence_field,f_miss,⋯,POS,REF,ALT,A1,TEST,OBS_CT,BETA,SE,T_STAT,P
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,⋯,<int>,<chr>,<chr>,<chr>,<chr>,<int>,<dbl>,<dbl>,<dbl>,<dbl>
rs28991009,ENSG00000171819,missense_variant,ENSP00000366015.3:p.Gln175His,,,,,"CSQ=T|intron_variant|MODIFIER|MTOR|ENSG00000198793|Transcript|ENST00000361445|protein_coding||28/57|ENST00000361445.4:c.4253+5631C>A|||||||rs28991009|1||-1||SNV|HGNC|3942|YES|||CCDS127.1|ENSP00000354558|P42345|Q96QW8&B1AKQ2&B1AKP8|UPI000012ABD3|1|||||T:0.0010|T:0|T:0|T:0|T:0.005|T:0|T:0.001589|T:0.006628|T:3.608e-03&A:8.236e-06|T:0.003737&A:8.532e-06|T:0.0006753&A:9.647e-05|T:0.0006062&A:0|T:0&A:0|T:0.0009074&A:0|T:0.006364&A:0|T:0.001155&A:0|T:0&A:0||||||||||||,T|missense_variant|MODERATE|ANGPTL7|ENSG00000171819|Transcript|ENST00000376819|protein_coding|3/5||ENST00000376819.3:c.525G>T|ENSP00000366015.3:p.Gln175His|764|525|175|Q/H|caG/caT|rs28991009|1||1||SNV|HGNC|24078|YES|||CCDS128.1|ENSP00000366015|O43827|F1T0A6|UPI0000035976||deleterious(0)|probably_damaging(1)|PROSITE_profiles:PS51406&hmmpanther:PTHR19143&Pfam_domain:PF00147&Gene3D:3.90.215.10&SMART_domains:SM00186&Superfamily_domains:SSF56496||T:0.0010|T:0|T:0|T:0|T:0.005|T:0|T:0.001589|T:0.006628|T:3.608e-03&A:8.236e-06|T:0.003737&A:8.532e-06|T:0.0006753&A:9.647e-05|T:0.0006062&A:0|T:0&A:0|T:0.0009074&A:0|T:0.006364&A:0|T:0.001155&A:0|T:0&A:0||||||||||||,T|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|ANGPTL7|ENSG00000171819|Transcript|ENST00000476934|processed_transcript|1/3||ENST00000476934.1:n.1G>T||1|||||rs28991009|1||1||SNV|HGNC|24078||||||||||||||T:0.0010|T:0|T:0|T:0|T:0.005|T:0|T:0.001589|T:0.006628|T:3.608e-03&A:8.236e-06|T:0.003737&A:8.532e-06|T:0.0006753&A:9.647e-05|T:0.0006062&A:0|T:0&A:0|T:0.0009074&A:0|T:0.006364&A:0|T:0.001155&A:0|T:0&A:0||||||||||||",0.001406,⋯,11253684,G,T,T,ADD,82114,-0.200434,0.0269646,-7.43322,1.07008e-13
rs150853613,ENSG00000244486,missense_variant,"ENSP00000385589.3:p.Ser549Leu,ENSP00000266214.5:p.Ser554Leu",,,,,"CSQ=A|missense_variant|MODERATE|SCARF2|ENSG00000244486|Transcript|ENST00000266214|protein_coding|10/11||ENST00000266214.5:c.1661C>T|ENSP00000266214.5:p.Ser554Leu|1766|1661|554|S/L|tCg/tTg|rs150853613|1||-1||SNV|HGNC|19869|YES|||CCDS13779.1|ENSP00000266214|Q96GP6||UPI0000135F1C|1|deleterious(0)|benign(0.076)|hmmpanther:PTHR24043&hmmpanther:PTHR24043:SF5&Low_complexity_(Seg):seg||A:0.0008|A:0|A:0|A:0|A:0.002|A:0.002|A:0.0006809|A:0.005233|A:2.800e-03|A:0.00281|A:0.0005789|A:0.002345|A:0|A:0.003085|A:0.003933|A:0.001101|A:0.001457||||||||||||,A|missense_variant|MODERATE|SCARF2|ENSG00000244486|Transcript|ENST00000405555|protein_coding|10/11||ENST00000405555.3:c.1646C>T|ENSP00000385589.3:p.Ser549Leu|1717|1646|549|S/L|tCg/tTg|rs150853613|1||-1||SNV|HGNC|19869||||CCDS46666.1|ENSP00000385589|Q96GP6||UPI000004715D|1|deleterious(0)|benign(0.035)|Low_complexity_(Seg):seg&hmmpanther:PTHR24043&hmmpanther:PTHR24043:SF5||A:0.0008|A:0|A:0|A:0|A:0.002|A:0.002|A:0.0006809|A:0.005233|A:2.800e-03|A:0.00281|A:0.0005789|A:0.002345|A:0|A:0.003085|A:0.003933|A:0.001101|A:0.001457||||||||||||,A|downstream_gene_variant|MODIFIER|KLHL22|ENSG00000099910|Transcript|ENST00000429594|nonsense_mediated_decay||||||||||rs150853613|1|1796|-1|cds_start_NF|SNV|HGNC|25888|||||ENSP00000392268|||UPI0001610F3B|1|||||A:0.0008|A:0|A:0|A:0|A:0.002|A:0.002|A:0.0006809|A:0.005233|A:2.800e-03|A:0.00281|A:0.0005789|A:0.002345|A:0|A:0.003085|A:0.003933|A:0.001101|A:0.001457||||||||||||,A|non_coding_transcript_exon_variant&non_coding_transcript_variant|MODIFIER|SCARF2|ENSG00000244486|Transcript|ENST00000494535|retained_intron|4/5||ENST00000494535.1:n.537C>T||537|||||rs150853613|1||-1||SNV|HGNC|19869|||||||||1|||||A:0.0008|A:0|A:0|A:0|A:0.002|A:0.002|A:0.0006809|A:0.005233|A:2.800e-03|A:0.00281|A:0.0005789|A:0.002345|A:0|A:0.003085|A:0.003933|A:0.001101|A:0.001457||||||||||||",0.0007147,⋯,20781732,G,A,A,ADD,82193,-0.169967,0.0332241,-5.11577,3.13171e-07


In [102]:
generate_cascade_plot <- function(gwas_df, annot.arr, MAF_l = 0, MAF_u = 1){
    colors <- list()
    colors[['gray']] <- 'gray'
    colors[['blue']] <- '#00BFC4'
    colors[['red']]  <- '#F8766D'

    gwas_df %>% 
    inner_join(annot.arr %>% select(ID, MAF, Gene_symbol, Csq, ld_indep), by='ID') %>%
    filter(MAF_l < MAF, MAF < MAF_u, ld_indep) %>%
    mutate(
#         MAF = log10(MAF),
        outlier = if_else(abs(BETA) >= .1, TRUE, FALSE),
#         ggrepel = if_else(abs(BETA) >= .1 | Csq == 'protein-truncating', Gene_symbol, "")
        ggrepel = if_else(Gene_symbol == 'ANGPTL7', Gene_symbol, "")        
    ) %>%
    ggplot(aes(x = MAF, y = BETA, color=Csq, label=ggrepel)) + 
#     geom_hline(yintercept = 0.1, linetype="dashed") +
#     geom_hline(yintercept = -0.1, linetype="dashed") +
    geom_hline(yintercept = 0, linetype="dashed") +
    geom_point() + theme_bw() + 
    labs(
        x = 'Minor allele frequency (log-scale)',
        y = 'BETA (SD)',
        color = 'Consequence'
    ) +
    theme(
        strip.text = element_text(size=7),
    #     legend.position="none"
        legend.position='bottom'
    )+
    # scale_color_brewer(palette="Dark2") +     
    scale_color_manual(values=c(colors[['gray']], colors[['red']], colors[['blue']])) +
    # scale_x_continuous(trans="log10") +     
    scale_x_continuous(trans="log10", breaks=c(0.01, 0.03, 0.1, 0.3)) + 
    scale_y_continuous(limits = c(-.25, .25))+
    ggrepel::geom_text_repel(size=3)

    
}

In [113]:
cascade_g <- arrangeGrob(
    IOPs[['INI2005254']] %>% filter(P < 5e-8) %>% generate_cascade_plot(annot.arr) + labs(title='(A) corneal compensated IOP (INI2005254)'),
    IOPs[['INI2005255']] %>% filter(P < 5e-8) %>% generate_cascade_plot(annot.arr) + labs(title='(B) Goldman-correlated IOP (INI2005255)'),
#     Glaucoma %>% filter(P < 5e-8) %>% mutate(BETA = log(OR)) %>%
#     generate_cascade_plot(annot.arr) + labs(title='(C) Glaucoma (HC276)'),
    nrow=1
)


In [114]:
ggsave('cascade.png', cascade_g, width=15, height=5)
ggsave('cascade.pdf', cascade_g, width=15, height=5)
