In [1]:
suppressWarnings(suppressPackageStartupMessages({
    library(tidyverse)
    library(data.table)
}))


In [2]:
fread_compressed <- function(file, zcat='zcat'){
    fread(cmd=paste(zcat, file, sep=' '), sep='\t')
}


In [3]:
array_hits_file <- '@@@@@@/projects/biomarkers/cascade/out_v3/cascade.array.hits.tsv.zst'
array_hits <- array_hits_file %>% fread_compressed('zstdcat')


In [4]:
array_hits %>% filter(
    ld_indep, 
    Csq %in% c('protein-altering', 'protein-truncating'), 
    is_outside_of_MHC
) %>% select(ID) %>% unique() %>% nrow() %>% print()

[1] 632


We tested for 632 protein-altering or protein-truncating variants.

In [5]:
phewas_file <- '@@@@@@/projects/biomarkers/phewas/v2/array.hits.phewas.tsv.gz'


In [6]:
phewas_df <- phewas_file %>% fread_compressed()


In [7]:
count_phewas <- function(df, p_thr=1e-7){
    filtered_df <- df %>% filter(as.numeric(P) < p_thr)
    c(
        filtered_df %>% select(GBE_ID) %>% unique() %>% nrow(),
        filtered_df %>% select(Variant_ID) %>% unique() %>% nrow(),
        filtered_df %>% nrow()
    )
}

In [8]:
show_counts <- function(cnts){
    print(sprintf('%d associations across %d phenotypes and %d variants', cnts[3], cnts[1], cnts[2]))
}

In [9]:
phewas_df %>% count_phewas() %>% show_counts()


[1] "61 associations across 28 phenotypes and 34 variants"


In [12]:
phewas_df %>%
mutate(sign = if_else(OR>1, '+', '-')) %>%
count(sign)

sign,n
<chr>,<int>
-,26
+,35


In [10]:
phewas_df %>% filter(Csq == 'protein-altering') %>% count_phewas() %>% show_counts()
phewas_df %>% filter(Csq == 'protein-truncating') %>% count_phewas() %>% show_counts()


[1] "59 associations across 26 phenotypes and 32 variants"
[1] "2 associations across 2 phenotypes and 2 variants"


In [12]:
phewas_df %>% filter(Csq == 'protein-truncating', is_outside_of_MHC) %>%
select(CHROM, POS, Variant_ID, GBE_ID, GBE_short_name, OR, SE, P)


CHROM,POS,Variant_ID,GBE_ID,GBE_short_name,OR,SE,P
<int>,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>
1,152280023,rs138726443,HC261,Eczema/dermatitis,1.79414,0.0750596,6.83663e-15
2,227917083,rs35138315,HC32,Other renal/kidney problem,6.8583,0.267987,6.72522e-13


In [11]:
phewas_df %>% count(Variant_ID, Gene_symbol) %>% arrange(-n) %>% head(10)

Variant_ID,Gene_symbol,n
<chr>,<chr>,<int>
rs2476601,PTPN22,7
rs148783236,USP8,4
rs913455,CARMIL1,4
Affx-20090007,ABCG8,3
rs1229984,ADH1B,3
rs146125856,USP8,3
rs11244035,OBP2B,2
rs137853336,HNF4A,2
rs1800961,HNF4A,2
rs2282143,SLC22A1,2


In [12]:
phewas_df %>% count(GBE_ID, GBE_short_name) %>% arrange(-n) %>% head(10)

GBE_ID,GBE_short_name,n
<chr>,<chr>,<int>
HC221,Diabetes,6
HC215,Hypertension,5
HC219,Hypothyroidism/myxoedema,5
HC188,Gallstones,4
HC326,Heart attack (MI),4
HC132,Angina,3
HC225,Cholecystitis,3
HC55,Hyperthyroidism/thyrotoxicosis,3
BIN_FC1006152,DVT diagnosed by doctor,2
BIN_FC11006152,Blood clot or DVT diagnosed by doctor,2


In [13]:
phewas_df %>% filter(Variant_ID == 'rs2476601') %>%
arrange(P) %>%
select(CHROM, POS, Variant_ID, GBE_ID, GBE_short_name, OR, SE, P, Csq, Gene_symbol)


CHROM,POS,Variant_ID,GBE_ID,GBE_short_name,OR,SE,P,Csq,Gene_symbol
<int>,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
1,114377568,rs2476601,HC219,Hypothyroidism/myxoedema,0.678304,0.0153792,1.4822600000000001e-140,protein-altering,PTPN22
1,114377568,rs2476601,RH130,Diabetes Mellitus Type 1,0.69831,0.0427625,4.56646e-17,protein-altering,PTPN22
1,114377568,rs2476601,HC55,Hyperthyroidism/thyrotoxicosis,0.748516,0.0349584,1.17186e-16,protein-altering,PTPN22
1,114377568,rs2476601,HC430,Rheumatoid arthritis,0.835827,0.0295014,1.21054e-09,protein-altering,PTPN22
1,114377568,rs2476601,cancer1003,Skin cancer,1.11511,0.0181657,2.00078e-09,protein-altering,PTPN22
1,114377568,rs2476601,cancer1060,Non-melanoma skin cancer,1.11881,0.0193796,6.91577e-09,protein-altering,PTPN22
1,114377568,rs2476601,HC221,Diabetes,0.910577,0.0164496,1.23522e-08,protein-altering,PTPN22


In [17]:
phewas_df %>% filter(is_rare) %>% count_phewas() %>% print()

[1] 5 5 6


In [20]:
phewas_df %>% filter(is_rare) %>%
arrange(P) %>%
select(CHROM, POS, Variant_ID, GBE_ID, GBE_short_name, OR, SE, P, Csq, Gene_symbol)


CHROM,POS,Variant_ID,GBE_ID,GBE_short_name,OR,SE,P,Csq,Gene_symbol
<int>,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
1,152280023,rs138726443,HC261,Eczema/dermatitis,1.79414,0.0750596,6.83663e-15,protein-truncating,FLG
2,227917083,rs35138315,HC32,Other renal/kidney problem,6.8583,0.267987,6.72522e-13,protein-truncating,COL4A4
7,87060844,rs45575636,HC188,Gallstones,1.37527,0.0558357,1.14997e-08,protein-altering,ABCB4
5,14751305,rs146886108,HC221,Diabetes,0.659806,0.0728634,1.15191e-08,protein-altering,ANKH
20,43042354,rs137853336,BIN_FC1006148,Eye problems/disorders Diabetic eye disease,9.6035,0.408779,3.13243e-08,protein-altering,HNF4A
20,43042354,rs137853336,HC221,Diabetes,3.77762,0.243364,4.72566e-08,protein-altering,HNF4A
