In [1]:
suppressWarnings(suppressPackageStartupMessages({
    library(tidyverse)
    library(data.table)
    library(corrplot)
}))


In [2]:
in_f <- 'ldscrg.tsv'
phenos_f <- '../phenotypes.txt'
plot_png_f <- "rg.png"


In [3]:
phenos_f %>% fread() -> phenos

in_f %>% fread() %>% 
mutate(
    p1 = str_replace_all(basename(p1), '^ukb24983_v2_hg19.|.array-combined.sumstats.gz$', ''),
    p2 = str_replace_all(basename(p2), '^ukb24983_v2_hg19.|.array-combined.sumstats.gz$', '')
) %>%
filter(!str_detect(p1, '^RH')) %>% 
filter(!str_detect(p2, '^RH')) %>%
mutate(rgg = if_else(abs(rg) > 1, sign(rg), rg)) -> df


In [4]:
df %>%
dim()

In [8]:
df %>%
filter(is.na(rgg)) %>%
select(p1, p2) %>%
gather(col, val) %>%
count(val) %>%
arrange(-n) %>%
filter(n > 10)

val,n
<chr>,<int>
HC107,338
HC209,336
HC74,336
HC405,314


In [9]:
df %>%
filter(p1 == 'HC107') %>%
head()

p1,p2,rg,se,z,p,h2_obs,h2_obs_se,h2_int,h2_int_se,gcov_int,gcov_int_se,rgg
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
HC107,BIN20406,,,,,0.4265,0.0863,0.9787,0.0057,0.0032,0.0039,
HC107,BIN21068,,,,,0.0829,0.0409,0.9639,0.0182,0.0093,0.0043,
HC107,BIN1020483,,,,,0.0061,0.0016,0.9972,0.006,0.0204,0.0048,
HC107,FH1262,,,,,0.0056,0.0018,0.9898,0.0058,0.0024,0.0038,
HC107,HC0,,,,,0.0067,0.0017,0.9817,0.0059,0.0173,0.0044,
HC107,HC1,,,,,0.0018,0.0015,0.9986,0.006,0.0087,0.0042,


In [10]:
png(file = "rg.png", width=2400,height=2400, units="px", family = "Helvetica")
corrplot(
    df %>%
    replace_na(list(rgg=0)) %>%
    dcast(p1 ~ p2, value.var = "rgg", fill=0) %>%
    column_to_rownames('p1') %>%
    as.matrix(),
    order='hclust',
    addrect = 100
)
dev.off()

In [13]:
28561 / 2

In [14]:
0.05 /15000

In [19]:
df %>%
filter(p1 < p2) %>%
arrange(p) %>%
filter(p < 5e-6) %>%
rename('GBE_ID_1'='p1', 'GBE_ID_2'='p2') %>%
left_join(phenos, by=c('GBE_ID_1'='GBE_ID')) %>%
rename('name_1'='Name', 'n_1'='Cases') %>%
left_join(phenos, by=c('GBE_ID_2'='GBE_ID')) %>%
rename('name_2'='Name', 'n_2'='Cases') %>%
select(GBE_ID_1, name_1, GBE_ID_2, name_2, p, rg, se, z, n_1, n_2)

GBE_ID_1,name_1,GBE_ID_2,name_2,p,rg,se,z,n_1,n_2
<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<int>
HC188,Cholelithiasis/gall stones,HC225,Cholecystitis,0.0000e+00,0.9994,0.0111,89.9051,14938,12961
cancer1001,Lung cancer,cancer1084,Respiratory/intrathoracic cancer,0.0000e+00,0.9492,0.0249,38.0888,1981,2392
cancer1003,Skin cancer,cancer1060,Non-melanoma skin cancer,0.0000e+00,0.9913,0.0036,278.7900,19168,16789
BIN_FC1006152,DVT diagnosed by doctor,BIN_FC11006152,Blood clot or DVT diagnosed by doctor,0.0000e+00,0.9784,0.0124,78.6252,7166,9053
cancer1020,Large bowel cancer/colorectal cancer,cancer1022,Colon cancer/sigmoid cancer,2.3998e-297,1.0092,0.0274,36.8554,4382,3251
HC201,Ulcerative colitis,HC95,Inflammatory bowel disease,8.4017e-237,0.9348,0.0284,32.8593,3147,3783
HC308,Hypertrophic cardiomyopathy (HCM / HOCM),HC414,Cardiomyopathy,2.3183e-157,0.9122,0.0341,26.7265,1096,1309
HC132,Angina,HC326,Heart attack/myocardial infarction,9.4273e-105,0.8774,0.0404,21.7357,18471,12138
HC132,Angina,HC215,Hypertension,2.0677e-70,0.5618,0.0317,17.7398,18471,107391
HC215,Hypertension,HC221,Diabetes,1.3011e-62,0.4679,0.0280,16.7004,107391,20437
