In [1]:
require(tidyverse)
require(data.table)


Loading required package: tidyverse
── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.1.0       ✔ purrr   0.3.1  
✔ tibble  2.0.1       ✔ dplyr   0.8.0.1
✔ tidyr   0.8.3       ✔ stringr 1.4.0  
✔ readr   1.3.1       ✔ forcats 0.4.0  
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
Loading required package: data.table

Attaching package: ‘data.table’

The following objects are masked from ‘package:dplyr’:

    between, first, last

The following object is masked from ‘package:purrr’:

    transpose



In [2]:
geno_f <- 'ukb24983_cal_cALL_v2_hg19_ANGPTL7_protein-altering_vars.raw'
geno_df <- fread(
    file=geno_f, sep='\t', data.table=F
)

In [3]:
IOP_pheno_f <- '/oak/stanford/groups/mrivas/ukbb24983/phenotypedata/10136/21731/phe/INI5255.phe'
IOP_pheno_df <- IOP_pheno_f %>% fread(sep='\t', head=F)
colnames(IOP_pheno_df) <- c('FID', 'IID', 'IOP')

In [4]:
covar_f <- '/oak/stanford/groups/mrivas/ukbb24983/sqc/ukb24983_GWAS_covar.phe'
covar_df <- fread(
    cmd=paste0('cat ', covar_f, '| cut -f1-9'), sep='\t', data.table=F
)

In [5]:
show_95CI <- function(estimate, standard_error){
    c(estimate - 1.96 * standard_error, estimate + 1.96 * standard_error) %>% print()
}

## rs28991009, Gln175His, 1:11253684:G:T

In [6]:
df_IOP_rs28991009 <- geno_df %>% select(IID, rs28991009_G) %>%
inner_join(IOP_pheno_df %>% select(IID, IOP), by='IID') %>%
inner_join(covar_df %>% select(-FID), by='IID') %>% 
filter(IOP != -9) %>% drop_na() %>%
left_join(
    data.frame(
        rs28991009_G = c(0, 1, 2),
        rs28991009 = c('T/T', 'G/T', 'G/G')   
    ),
    by='rs28991009_G'
) %>%
select(-rs28991009_G)

In [7]:
df_IOP_rs28991009 %>% count(rs28991009)

rs28991009,n
G/G,80028
G/T,1355
T/T,5


In [8]:
lm_rs28991009 <- lm (
    IOP ~ age + as.factor(sex) + as.factor(Array) + PC1 + PC2 + PC3 + PC4 + as.factor(rs28991009),
    df_IOP_rs28991009 
)


In [9]:
summary(lm_rs28991009)


Call:
lm(formula = IOP ~ age + as.factor(sex) + as.factor(Array) + 
    PC1 + PC2 + PC3 + PC4 + as.factor(rs28991009), data = df_IOP_rs28991009)

Residuals:
    Min      1Q  Median      3Q     Max 
-16.589  -2.629  -0.330   2.207  54.924 

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)    
(Intercept)              13.176825   0.166714  79.039  < 2e-16 ***
age                       0.040123   0.001778  22.572  < 2e-16 ***
as.factor(sex)1           0.121370   0.027927   4.346 1.39e-05 ***
as.factor(Array)1        -0.063010   0.045223  -1.393   0.1635    
PC1                      -0.014744   0.009083  -1.623   0.1046    
PC2                       0.017346   0.009392   1.847   0.0648 .  
PC3                       0.007941   0.009000   0.882   0.3776    
PC4                       0.003841   0.005145   0.747   0.4553    
as.factor(rs28991009)G/T -0.732150   0.108803  -6.729 1.72e-11 ***
as.factor(rs28991009)T/T -2.951591   1.776088  -1.662   0.0965 .  
---
Sign

In [10]:
show_95CI(-0.732150, 0.108803)

[1] -0.9454039 -0.5188961


In [11]:
show_95CI(-2.951591, 1.776088)

[1] -6.4327235  0.5295415


## rs143435072, Arg177Ter, 1:11253688:C:T

In [12]:
df_IOP_rs143435072 <- geno_df %>% select(IID, rs143435072_C) %>%
inner_join(IOP_pheno_df %>% select(IID, IOP), by='IID') %>%
inner_join(covar_df %>% select(-FID), by='IID') %>% 
filter(IOP != -9) %>% drop_na() %>%
left_join(
    data.frame(
        rs143435072_C = c(0, 1, 2),
        rs143435072 = c('T/T', 'C/T', 'C/C')   
    ),
    by='rs143435072_C'
) %>%
select(-rs143435072_C)

In [13]:
df_IOP_rs143435072 %>% count(rs143435072)

rs143435072,n
C/C,81453
C/T,62


In [14]:
lm_rs143435072 <- lm (
    IOP ~ age + as.factor(sex) + as.factor(Array) + PC1 + PC2 + PC3 + PC4 + as.factor(rs143435072),
    df_IOP_rs143435072 
)


In [15]:
summary(lm_rs143435072)


Call:
lm(formula = IOP ~ age + as.factor(sex) + as.factor(Array) + 
    PC1 + PC2 + PC3 + PC4 + as.factor(rs143435072), data = df_IOP_rs143435072)

Residuals:
    Min      1Q  Median      3Q     Max 
-16.576  -2.629  -0.331   2.202  54.937 

Coefficients:
                           Estimate Std. Error t value Pr(>|t|)    
(Intercept)               13.167172   0.166573  79.048  < 2e-16 ***
age                        0.040201   0.001776  22.633  < 2e-16 ***
as.factor(sex)1            0.120998   0.027907   4.336 1.45e-05 ***
as.factor(Array)1         -0.064417   0.045208  -1.425   0.1542    
PC1                       -0.014126   0.009076  -1.556   0.1196    
PC2                        0.017798   0.009386   1.896   0.0579 .  
PC3                        0.008495   0.008994   0.944   0.3449    
PC4                        0.004117   0.005141   0.801   0.4233    
as.factor(rs143435072)C/T -0.944409   0.504579  -1.872   0.0613 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ 

In [16]:
show_95CI(-0.944409, 0.504579)

[1] -1.93338384  0.04456584


## rs28991002, Arg140His, 1:11252369:G:A


In [17]:
df_IOP_rs28991002 <- geno_df %>% select(IID, rs28991002_G) %>%
inner_join(IOP_pheno_df %>% select(IID, IOP), by='IID') %>%
inner_join(covar_df %>% select(-FID), by='IID') %>% 
filter(IOP != -9) %>% drop_na() %>%
left_join(
    data.frame(
        rs28991002_G = c(0, 1, 2),
        rs28991002 = c('A/A', 'G/A', 'G/G')   
    ),
    by='rs28991002_G'
) %>%
select(-rs28991002_G)

In [18]:
df_IOP_rs28991002 %>% count(rs28991002)

rs28991002,n
G/A,427
G/G,81055


In [19]:
lm_rs28991002 <- lm (
    IOP ~ age + as.factor(sex) + as.factor(Array) + PC1 + PC2 + PC3 + PC4 + as.factor(rs28991002),
    df_IOP_rs28991002 
)


In [20]:
summary(lm_rs28991002)


Call:
lm(formula = IOP ~ age + as.factor(sex) + as.factor(Array) + 
    PC1 + PC2 + PC3 + PC4 + as.factor(rs28991002), data = df_IOP_rs28991002)

Residuals:
    Min      1Q  Median      3Q     Max 
-16.577  -2.629  -0.331   2.202  54.934 

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)    
(Intercept)              12.679623   0.254799  49.763  < 2e-16 ***
age                       0.040243   0.001777  22.652  < 2e-16 ***
as.factor(sex)1           0.121084   0.027911   4.338 1.44e-05 ***
as.factor(Array)1        -0.064080   0.045220  -1.417   0.1565    
PC1                      -0.014235   0.009077  -1.568   0.1168    
PC2                       0.018324   0.009388   1.952   0.0510 .  
PC3                       0.007724   0.008996   0.859   0.3906    
PC4                       0.004097   0.005143   0.797   0.4257    
as.factor(rs28991002)G/G  0.481583   0.192698   2.499   0.0125 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residua

#### Note. BETA is computed for G/G above

In [21]:
show_95CI(-0.481583, 0.192698)

[1] -0.8592711 -0.1038949


## rs200058074, Gln136Arg, 1:11252357:A:G



In [22]:
df_IOP_rs200058074 <- geno_df %>% select(IID, rs200058074_A) %>%
inner_join(IOP_pheno_df %>% select(IID, IOP), by='IID') %>%
inner_join(covar_df %>% select(-FID), by='IID') %>% 
filter(IOP != -9) %>% drop_na() %>%
left_join(
    data.frame(
        rs200058074_A = c(0, 1, 2),
        rs200058074 = c('G/G', 'A/G', 'A/A')   
    ),
    by='rs200058074_A'
) %>%
select(-rs200058074_A)

In [23]:
df_IOP_rs200058074 %>% count(rs200058074)

rs200058074,n
A/A,81403
A/G,80


In [24]:
lm_rs200058074 <- lm (
    IOP ~ age + as.factor(sex) + as.factor(Array) + PC1 + PC2 + PC3 + PC4 + as.factor(rs200058074),
    df_IOP_rs200058074
)


In [25]:
summary(lm_rs200058074)


Call:
lm(formula = IOP ~ age + as.factor(sex) + as.factor(Array) + 
    PC1 + PC2 + PC3 + PC4 + as.factor(rs200058074), data = df_IOP_rs200058074)

Residuals:
    Min      1Q  Median      3Q     Max 
-16.575  -2.629  -0.331   2.202  54.938 

Coefficients:
                           Estimate Std. Error t value Pr(>|t|)    
(Intercept)               13.168731   0.166622  79.034  < 2e-16 ***
age                        0.040192   0.001776  22.625  < 2e-16 ***
as.factor(sex)1            0.120881   0.027913   4.331 1.49e-05 ***
as.factor(Array)1         -0.063037   0.045233  -1.394   0.1634    
PC1                       -0.013960   0.009078  -1.538   0.1241    
PC2                        0.017675   0.009387   1.883   0.0597 .  
PC3                        0.008441   0.008998   0.938   0.3482    
PC4                        0.004158   0.005142   0.809   0.4187    
as.factor(rs200058074)A/G -0.207756   0.444267  -0.468   0.6400    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ 

In [26]:
show_95CI(-0.207756, 0.444267)

[1] -1.0785193  0.6630073


## for forest plot


In [34]:
df2_IOP_rs28991009 <- geno_df %>% select(IID, rs28991009_G) %>%
inner_join(IOP_pheno_df %>% select(IID, IOP), by='IID') %>%
inner_join(covar_df %>% select(-FID), by='IID') %>% 
filter(IOP != -9) %>% drop_na() %>%
# left_join(
#     data.frame(
#         rs28991009_G = c(0, 1, 2),
#         rs28991009 = c('T/T', 'G/T', 'G/G')   
#     ),
#     by='rs28991009_G'
# ) %>%
mutate(
    rs28991009 = 2 - rs28991009_G
) %>%
select(-rs28991009_G)

In [36]:
df2_IOP_rs28991009 %>% count(rs28991009)

rs28991009,n
0,80028
1,1355
2,5


In [37]:
lm_additive_rs28991009 <- lm (
    IOP ~ age + as.factor(sex) + as.factor(Array) + PC1 + PC2 + PC3 + PC4 + rs28991009,
    df2_IOP_rs28991009 
)


In [38]:
summary(lm_additive_rs28991009)


Call:
lm(formula = IOP ~ age + as.factor(sex) + as.factor(Array) + 
    PC1 + PC2 + PC3 + PC4 + rs28991009, data = df2_IOP_rs28991009)

Residuals:
    Min      1Q  Median      3Q     Max 
-16.589  -2.629  -0.329   2.207  54.924 

Coefficients:
                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)       13.177090   0.166713  79.040  < 2e-16 ***
age                0.040124   0.001778  22.572  < 2e-16 ***
as.factor(sex)1    0.121260   0.027927   4.342 1.41e-05 ***
as.factor(Array)1 -0.063109   0.045223  -1.396   0.1629    
PC1               -0.014735   0.009083  -1.622   0.1048    
PC2                0.017348   0.009392   1.847   0.0647 .  
PC3                0.007960   0.009000   0.884   0.3765    
PC4                0.003860   0.005145   0.750   0.4531    
rs28991009        -0.743050   0.108010  -6.879 6.05e-12 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 3.971 on 81379 degrees of freedom
Multiple R-squared:  0.00729