In [1]:
suppressWarnings(suppressPackageStartupMessages({
    library(tidyverse)
    library(data.table)
}))


In [84]:
hardy_f   <- '/oak/stanford/groups/mrivas/ukbb24983/array-combined/annotation/afreq_20201012/plink_output/ukb24983_cal_hla_cnv.white_british.hardy.zst'
hardy_x_f <- str_replace(hardy_f, '.zst$', '.x.zst')


In [3]:
cat_or_zcat <- function(f){
    ifelse(endsWith(f, '.zst'), 'zstdcat', ifelse(endsWith(f, '.gz'), 'zcat', 'cat'))
}

fread_CHROM <- function(f){
    fread(cmd=paste(cat_or_zcat(f), f), colClasses = c('#CHROM'='character')) %>% rename('CHROM'='#CHROM')
}


In [82]:
plot_cumsum <- function(df){
    df %>% mutate(log10P=-log10(MIDP + .Machine$double.xmin)) %>%
    ggplot(aes(x = log10P)) + stat_bin(aes(y=cumsum(..count..)/nrow(df)),geom="line",color="black", bins=100) +
    xlim(0, 20) + ylim(.5, 1) + labs(
        x = latex2exp::TeX('HWE (midp) -log_{10}(P)'), y = 'cumsum(n) / n '
    ) + theme_bw() + theme(
        legend.title = element_text(size=16),
        legend.text  = element_text(size=16),
        axis.text=element_text(size=16), 
        axis.title=element_text(size=16)
    ) +
    geom_hline(yintercept = 1, color='gray') +
    geom_vline(xintercept = 7, color='red')
}


In [4]:
hardy_f   %>% fread_CHROM() -> hardy_df
hardy_x_f %>% fread_CHROM() -> hardy_x_df


In [88]:
suppressWarnings(suppressPackageStartupMessages({
for(ext in c('png', 'pdf')){
    ggsave(
        sprintf('hwe_midp_plot.%s', ext),
        gridExtra::arrangeGrob(
            hardy_df %>% plot_cumsum() + labs(title='autosomes'),
            hardy_x_df %>% plot_cumsum() + labs(title='chr X'),
            ncol=2
        ), width=12,height=6
    )    
}
}))
