In [1]:
library(tidyverse)
library(data.table)

── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 2.2.1     ✔ purrr   0.2.4
✔ tibble  1.4.2     ✔ dplyr   0.7.4
✔ tidyr   0.7.2     ✔ stringr 1.2.0
✔ readr   1.1.1     ✔ forcats 0.2.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Attaching package: ‘data.table’

The following objects are masked from ‘package:dplyr’:

    between, first, last

The following object is masked from ‘package:purrr’:

    transpose



In [29]:
library(VennDiagram)

Loading required package: grid
Loading required package: futile.logger


In [2]:
file_gwas <-   '../private_output/vanilla_PRS/HC_20190302/2_GWAS/ukb16698_v2.HC276.PHENO1.glm.logistic.hybrid.gz'
file_clump <-  '../private_output/vanilla_PRS/HC_20190302/4_clumped_GWAS/1e-3/ukb16698_v2.HC276.PHENO1.glm.logistic.hybrid.gz'
file_snpnet <- '../private_output/snpnet_PRS/HC_20190303/3_snpnet/HC276.tsv.gz'


In [33]:
GWAS_Bonferroni <- 0.05 / 784256

In [24]:
read_snpnet <- function(file){
    df <- fread(
        paste0('zcat <', file, ' | sed -e "s/^#//g"'), 
        data.table=FALSE
    ) %>%
    filter(BETA != 0)
    return(df)
}

In [20]:
read_plink <- function(file){
    df <- fread(
        paste0('zcat <', file, ' | sed -e "s/^#//g"'), 
        data.table=FALSE
    ) 
    return(df)
}

In [52]:
get_IDs <- function(df){
    return(as.list(df %>% select(ID))$ID)
}

In [55]:
df_gwas <- read_plink(file_gwas)

In [35]:
df_clump <- read_plink(file_clump)

In [102]:
df_clump %>% head()

CHROM,POS,ID,REF,ALT,A1,FIRTH?,TEST,OBS_CT,OR,SE,Z_STAT,P
1,1115461,rs116383664,T,C,T,N,ADD,259332,2.14105,0.224232,3.39512,0.000685993
1,1337334,rs1171,T,G,T,N,ADD,269549,0.882804,0.0345877,-3.60394,0.000313424
1,5643543,rs61762122,A,G,A,N,ADD,268882,0.897106,0.0295289,-3.67711,0.000235891
1,5645443,rs4845804,A,G,A,N,ADD,246525,0.928359,0.0224432,-3.31222,0.000925576
1,11253684,rs28991009,T,G,T,N,ADD,269394,0.594745,0.148717,-3.49404,0.000475774
1,14978743,rs12568088,A,G,A,N,ADD,269289,1.0804,0.0233246,3.31554,0.000914649


In [36]:
df_snpnet <- read_snpnet(file_snpnet)

In [38]:
df_clump %>% dim()

In [37]:
df_gwas %>% dim()

In [39]:
df_snpnet %>% dim()

In [63]:
venn.diagram(
    x = list(
        df_snpnet %>% get_IDs(), 
        df_gwas %>% filter(P <= GWAS_Bonferroni) %>%  get_IDs(), 
        df_clump %>% get_IDs()
    ),
    category.names = c("snpnet" , "GWAS" , "LD clumping"),
    filename = 'test.png',
    output = TRUE ,
    imagetype="png" ,
    height = 480 , 
    width = 480 , 
    resolution = 300,
    compression = "lzw",
    lwd = 2,
    lty = 'blank',
    fill = c('yellow', 'purple', 'green'),
    cex = 1,
    fontface = "bold",
    fontfamily = "sans",
    cat.cex = 0.6,
    cat.fontface = "bold",
    cat.default.pos = "outer",
    cat.pos = c(-27, 27, 135),
    cat.dist = c(0.055, 0.055, 0.085),
    cat.fontfamily = "sans",
    rotation = 1
)

In [62]:
calculate.overlap(
    list(
        df_snpnet %>% get_IDs(), 
        df_gwas %>% filter(P <= GWAS_Bonferroni) %>%  get_IDs(), 
        df_clump %>% get_IDs()        
    )
) %>% lapply(length)

In [78]:
venn.diagram(
    x = list(
        df_snpnet %>% get_IDs(), 
        df_gwas %>% filter(P <= GWAS_Bonferroni) %>%  get_IDs(), 
        df_clump %>% get_IDs()
    ),
    category.names = c("snpnet" , "GWAS" , "LD clumping"),
    filename = 'foo.png',
    output = TRUE ,
    imagetype="png" ,
    height = 480 , 
    width = 480 , 
    resolution = 300,
    compression = "lzw",
    lwd = 2,
    lty = 'blank',
    fill = c('yellow', 'purple', 'green'),
    cex = 1,
    fontface = "bold",
    fontfamily = "sans",
    cat.cex = 0.6,
    cat.fontface = "bold",
    cat.default.pos = "outer",
    cat.pos = c(-27, 27, 135),
    cat.dist = c(0.055, 0.055, 0.085),
    cat.fontfamily = "sans",
    rotation = 1,
    area.vector = c(
        0,0,0,0,0,0,0
    )    
)


In [100]:
sA <- df_snpnet %>% get_IDs()
sB <- df_gwas %>% filter(P <= GWAS_Bonferroni) %>%  get_IDs()
sC <- df_clump %>% get_IDs()        
sAB <- intersect(sA, sB)
sAC <- intersect(sA, sC)
sBC <- intersect(sB, sC)
sABC <- intersect(sAB, sC)
nA <- length(sA) - length(sAB) - length(sAC) + length(sABC)
nB <- length(sB) - length(sAB) - length(sBC) + length(sABC)
nC <- length(sC) - length(sBC) - length(sAC) + length(sABC)
nAB <- length(sAB) - length(sABC)
nBC <- length(sBC) - length(sABC)
nAC <- length(sAC) - length(sABC)
nABC <- length(sABC)

In [101]:
print(c(nA, nB, nC, nAB, nBC, nAC, nABC))

[1]  888   18 1002    4    8   80   10


In [66]:
intersect(
    df_snpnet %>% get_IDs(), 
    df_gwas %>% filter(P <= GWAS_Bonferroni) %>%  get_IDs()   
) %>% length()

In [70]:
intersect(
    intersect(
        df_snpnet %>% get_IDs(), 
        df_gwas %>% filter(P <= GWAS_Bonferroni) %>%  get_IDs()   
    ),    
    df_clump %>% get_IDs()        
) %>% length()