In [1]:
require(tidyverse)
require(data.table)


Loading required package: tidyverse
── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.1.0       ✔ purrr   0.3.1  
✔ tibble  2.0.1       ✔ dplyr   0.8.0.1
✔ tidyr   0.8.3       ✔ stringr 1.4.0  
✔ readr   1.3.1       ✔ forcats 0.4.0  
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
Loading required package: data.table

Attaching package: ‘data.table’

The following objects are masked from ‘package:dplyr’:

    between, first, last

The following object is masked from ‘package:purrr’:

    transpose



In [2]:
geno_f <- 'ukb24983_cal_cALL_v2_hg19_ANGPTL7_protein-altering_vars.raw'
geno_df <- fread(
    file=geno_f, sep='\t', data.table=F
)

In [14]:
pheno_f <- '/oak/stanford/groups/mrivas/ukbb24983/phenotypedata/extras/highconfidenceqc/phe/HC276.phe'
pheno_df <- pheno_f %>% fread(sep='\t', head=F)
colnames(pheno_df) <- c('FID', 'IID', 'Glaucoma')
pheno_df <- pheno_df %>%
mutate(Glaucoma = Glaucoma - 1)

In [4]:
covar_f <- '/oak/stanford/groups/mrivas/ukbb24983/sqc/ukb24983_GWAS_covar.phe'
covar_df <- fread(
    cmd=paste0('cat ', covar_f, '| cut -f1-9'), sep='\t', data.table=F
)

In [36]:
show_95CI <- function(estimate, standard_error){
    c(
        estimate,
        estimate - 1.96 * standard_error, 
        estimate + 1.96 * standard_error
    ) %>% print()
    c(
        estimate,
        estimate - 1.96 * standard_error, 
        estimate + 1.96 * standard_error
    ) %>% lapply(exp) %>% simplify() %>%  print()    
}

## rs28991009, Gln175His, 1:11253684:G:T

In [15]:
df_rs28991009 <- geno_df %>% select(IID, rs28991009_G) %>%
inner_join(pheno_df %>% select(IID, Glaucoma), by='IID') %>%
inner_join(covar_df %>% select(-FID), by='IID') %>% 
filter(Glaucoma != -9) %>% drop_na() %>%
left_join(
    data.frame(
        rs28991009_G = c(0, 1, 2),
        rs28991009 = c('T/T', 'G/T', 'G/G')   
    ),
    by='rs28991009_G'
) %>%
select(-rs28991009_G)

In [16]:
df_rs28991009 %>% count(rs28991009, Glaucoma)

rs28991009,Glaucoma,n
G/G,0,325445
G/G,1,5794
G/T,0,5353
G/T,1,57
T/T,0,27
T/T,1,1


In [20]:
glm_rs28991009 <- glm (
    Glaucoma ~ age + as.factor(sex) + as.factor(Array) + PC1 + PC2 + PC3 + PC4 + as.factor(rs28991009),
    df_rs28991009, family=binomial(link="logit")
)


In [22]:
summary(glm_rs28991009)


Call:
glm(formula = Glaucoma ~ age + as.factor(sex) + as.factor(Array) + 
    PC1 + PC2 + PC3 + PC4 + as.factor(rs28991009), family = binomial(link = "logit"), 
    data = df_rs28991009)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.3815  -0.2224  -0.1733  -0.1204   3.4853  

Coefficients:
                           Estimate Std. Error z value Pr(>|z|)    
(Intercept)              -10.540355   0.186726 -56.448  < 2e-16 ***
age                        0.092780   0.002102  44.141  < 2e-16 ***
as.factor(sex)1            0.200978   0.026543   7.572 3.68e-14 ***
as.factor(Array)1          0.007181   0.042163   0.170 0.864754    
PC1                       -0.006324   0.008658  -0.730 0.465171    
PC2                       -0.001110   0.008953  -0.124 0.901321    
PC3                       -0.005598   0.008591  -0.652 0.514638    
PC4                       -0.015498   0.004766  -3.252 0.001148 ** 
as.factor(rs28991009)G/T  -0.518943   0.134115  -3.869 0.000109 ***
as.

In [37]:
show_95CI(-0.518943, 0.134115)

[1] -0.5189430 -0.7818084 -0.2560776
[1] 0.5951493 0.4575778 0.7740819


In [38]:
show_95CI(0.605530, 1.024912)

[1]  0.605530 -1.403298  2.614358
[1]  1.8322230  0.2457851 13.6584383


## rs143435072, Arg177Ter, 1:11253688:C:T

In [40]:
df_rs143435072 <- geno_df %>% select(IID, rs143435072_C) %>%
inner_join(pheno_df %>% select(IID, Glaucoma), by='IID') %>%
inner_join(covar_df %>% select(-FID), by='IID') %>% 
filter(Glaucoma != -9) %>% drop_na() %>%
left_join(
    data.frame(
        rs143435072_C = c(0, 1, 2),
        rs143435072 = c('T/T', 'C/T', 'C/C')   
    ),
    by='rs143435072_C'
) %>%
select(-rs143435072_C)

In [42]:
df_rs143435072 %>% count(rs143435072, Glaucoma)

rs143435072,Glaucoma,n
C/C,0,330978
C/C,1,5860
C/T,0,272
C/T,1,2


In [43]:
glm_rs143435072 <- glm (
    Glaucoma ~ age + as.factor(sex) + as.factor(Array) + PC1 + PC2 + PC3 + PC4 + as.factor(rs143435072),
    df_rs143435072, family=binomial(link="logit")
)


In [44]:
summary(glm_rs143435072)


Call:
glm(formula = Glaucoma ~ age + as.factor(sex) + as.factor(Array) + 
    PC1 + PC2 + PC3 + PC4 + as.factor(rs143435072), family = binomial(link = "logit"), 
    data = df_rs143435072)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.3542  -0.2224  -0.1737  -0.1206   3.4869  

Coefficients:
                            Estimate Std. Error z value Pr(>|z|)    
(Intercept)               -10.545403   0.186547 -56.530  < 2e-16 ***
age                         0.092778   0.002100  44.177  < 2e-16 ***
as.factor(sex)1             0.203656   0.026521   7.679  1.6e-14 ***
as.factor(Array)1           0.005215   0.042102   0.124  0.90142    
PC1                        -0.006405   0.008650  -0.741  0.45897    
PC2                        -0.001294   0.008945  -0.145  0.88503    
PC3                        -0.005168   0.008584  -0.602  0.54711    
PC4                        -0.015260   0.004763  -3.204  0.00136 ** 
as.factor(rs143435072)C/T  -0.869546   0.710682  -1.224  0.2

In [45]:
show_95CI(-0.869546, 0.710682)

[1] -0.8695460 -2.2624827  0.5233907
[1] 0.4191418 0.1040917 1.6877406


## rs28991002, Arg140His, 1:11252369:G:A


In [48]:
df_rs28991002 <- geno_df %>% select(IID, rs28991002_G) %>%
inner_join(pheno_df %>% select(IID, Glaucoma), by='IID') %>%
inner_join(covar_df %>% select(-FID), by='IID') %>% 
filter(Glaucoma != -9) %>% drop_na() %>%
left_join(
    data.frame(
        rs28991002_G = c(0, 1, 2),
        rs28991002 = c('A/A', 'G/A', 'G/G')   
    ),
    by='rs28991002_G'
) %>%
select(-rs28991002_G)

In [50]:
df_rs28991002 %>% count(rs28991002, Glaucoma)

rs28991002,Glaucoma,n
A/A,0,1
G/A,0,1678
G/A,1,27
G/G,0,329417
G/G,1,5832


In [56]:
glm_rs28991002 <- glm (
    Glaucoma ~ age + as.factor(sex) + as.factor(Array) + PC1 + PC2 + PC3 + PC4 + as.factor(rs28991002),
    df_rs28991002, family=binomial(link="logit")
)


In [57]:
summary(glm_rs28991002)


Call:
glm(formula = Glaucoma ~ age + as.factor(sex) + as.factor(Array) + 
    PC1 + PC2 + PC3 + PC4 + as.factor(rs28991002), family = binomial(link = "logit"), 
    data = df_rs28991002)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.3542  -0.2224  -0.1737  -0.1206   3.4873  

Coefficients:
                           Estimate Std. Error z value Pr(>|z|)    
(Intercept)              -15.397368  72.463075  -0.212  0.83173    
age                        0.092834   0.002101  44.180  < 2e-16 ***
as.factor(sex)1            0.202780   0.026528   7.644 2.11e-14 ***
as.factor(Array)1          0.005623   0.042132   0.133  0.89384    
PC1                       -0.006400   0.008651  -0.740  0.45946    
PC2                       -0.001066   0.008948  -0.119  0.90518    
PC3                       -0.005198   0.008586  -0.605  0.54489    
PC4                       -0.015370   0.004765  -3.226  0.00126 ** 
as.factor(rs28991002)G/A   4.732850  72.463121   0.065  0.94792    
as.

In [58]:
glm_rs28991002 <- glm (
    Glaucoma ~ age + as.factor(sex) + as.factor(Array) + PC1 + PC2 + PC3 + PC4 + as.factor(rs28991002),
    df_rs28991002 %>% filter(rs28991002 != 'A/A'), family=binomial(link="logit")
)


In [59]:
summary(glm_rs28991002)


Call:
glm(formula = Glaucoma ~ age + as.factor(sex) + as.factor(Array) + 
    PC1 + PC2 + PC3 + PC4 + as.factor(rs28991002), family = binomial(link = "logit"), 
    data = df_rs28991002 %>% filter(rs28991002 != "A/A"))

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.3542  -0.2224  -0.1737  -0.1206   3.4873  

Coefficients:
                           Estimate Std. Error z value Pr(>|z|)    
(Intercept)              -10.664518   0.269489 -39.573  < 2e-16 ***
age                        0.092834   0.002101  44.183  < 2e-16 ***
as.factor(sex)1            0.202780   0.026528   7.644  2.1e-14 ***
as.factor(Array)1          0.005623   0.042131   0.133  0.89383    
PC1                       -0.006400   0.008651  -0.740  0.45945    
PC2                       -0.001066   0.008947  -0.119  0.90518    
PC3                       -0.005198   0.008586  -0.605  0.54488    
PC4                       -0.015370   0.004765  -3.226  0.00126 ** 
as.factor(rs28991002)G/G   0.114500   0

#### Note. BETA is computed for G/G above

In [60]:
show_95CI(-0.114500, 0.195118)

[1] -0.1145000 -0.4969313  0.2679313
[1] 0.8918119 0.6083948 1.3072573


## rs200058074, Gln136Arg, 1:11252357:A:G



In [61]:
df_rs200058074 <- geno_df %>% select(IID, rs200058074_A) %>%
inner_join(pheno_df %>% select(IID, Glaucoma), by='IID') %>%
inner_join(covar_df %>% select(-FID), by='IID') %>% 
filter(Glaucoma != -9) %>% drop_na() %>%
left_join(
    data.frame(
        rs200058074_A = c(0, 1, 2),
        rs200058074 = c('G/G', 'A/G', 'A/A')   
    ),
    by='rs200058074_A'
) %>%
select(-rs200058074_A)

In [62]:
df_rs200058074 %>% count(rs200058074, Glaucoma)

rs200058074,Glaucoma,n
A/A,0,330740
A/A,1,5857
A/G,0,358
A/G,1,3


In [64]:
glm_rs200058074 <- glm (
    Glaucoma ~ age + as.factor(sex) + as.factor(Array) + PC1 + PC2 + PC3 + PC4 + as.factor(rs200058074),
    df_rs200058074, family=binomial(link="logit")
)


In [65]:
summary(glm_rs200058074)


Call:
glm(formula = Glaucoma ~ age + as.factor(sex) + as.factor(Array) + 
    PC1 + PC2 + PC3 + PC4 + as.factor(rs200058074), family = binomial(link = "logit"), 
    data = df_rs200058074)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.3545  -0.2225  -0.1737  -0.1205   3.4871  

Coefficients:
                            Estimate Std. Error z value Pr(>|z|)    
(Intercept)               -10.549545   0.186619 -56.530  < 2e-16 ***
age                         0.092871   0.002101  44.199  < 2e-16 ***
as.factor(sex)1             0.203438   0.026526   7.669 1.73e-14 ***
as.factor(Array)1           0.004215   0.042104   0.100  0.92026    
PC1                        -0.006520   0.008652  -0.754  0.45107    
PC2                        -0.001929   0.008947  -0.216  0.82930    
PC3                        -0.004996   0.008586  -0.582  0.56068    
PC4                        -0.015248   0.004764  -3.201  0.00137 ** 
as.factor(rs200058074)A/G  -0.739735   0.581039  -1.273  0.2

In [66]:
show_95CI(-0.739735, 0.581039)

[1] -0.7397350 -1.8785714  0.3991014
[1] 0.4772404 0.1528082 1.4904848


In [71]:
geno_df %>% dim()

In [73]:
geno_df %>% head()

FID,IID,PAT,MAT,SEX,PHENOTYPE,rs200058074_A,rs28991002_G,rs28991009_G,rs143435072_C
1000028,1000028,0,0,2,-9,2,2,2,2
1000034,1000034,0,0,1,-9,2,2,2,2
1000045,1000045,0,0,2,-9,2,2,2,2
1000052,1000052,0,0,2,-9,2,2,2,2
1000076,1000076,0,0,1,-9,2,2,2,2
1000087,1000087,0,0,2,-9,2,2,1,2


In [74]:
geno_df %>% count(rs28991009_G)

rs28991009_G,n
0.0,28
1.0,5410
2.0,331239
,474


In [78]:
100 * 5410 / (377151 - 474)

In [81]:
100 * 28 / (377151 - 474)

In [80]:
geno_df %>% count(rs143435072_C)

rs143435072_C,n
1.0,274
2.0,336838
,39


In [82]:
100 * 274 / (377151 - 39)

In [76]:
geno_df %>% count(rs200058074_A) 

rs200058074_A,n
1.0,361
2.0,336597
,193


In [79]:
100 * 361 / (337151 - 193)

In [83]:
geno_df %>% count(rs28991002_G)

rs28991002_G,n
0.0,1
1.0,1705
2.0,335249
,196


In [84]:
100 * 1705 / (337151 - 196)

In [86]:
geno_df %>% count(rs200058074_A)

rs200058074_A,n
1.0,361
2.0,336597
,193


In [88]:
100 * 361 / (337151 - 193)