In [1]:
require(tidyverse)
require(data.table)


Loading required package: tidyverse
── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.1.0       ✔ purrr   0.3.1  
✔ tibble  2.0.1       ✔ dplyr   0.8.0.1
✔ tidyr   0.8.3       ✔ stringr 1.4.0  
✔ readr   1.3.1       ✔ forcats 0.4.0  
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
Loading required package: data.table

Attaching package: ‘data.table’

The following objects are masked from ‘package:dplyr’:

    between, first, last

The following object is masked from ‘package:purrr’:

    transpose



In [2]:
covar_f <- '/oak/stanford/groups/mrivas/ukbb24983/sqc/ukb24983_GWAS_covar.phe'

geno_f <- 'ukb24983_cal_cALL_v2_hg19_ANGPTL7_protein-altering_vars.raw'

pheno_f <- list()
pheno_f[['Glaucoma']] <- '/oak/stanford/groups/mrivas/ukbb24983/phenotypedata/extras/highconfidenceqc/phe/HC276.phe'
pheno_f[['IOP']] <- '/oak/stanford/groups/mrivas/ukbb24983/phenotypedata/10136/21731/phe/INI5255.phe'


In [3]:
covar_df <- fread(
    cmd=paste0('cat ', covar_f, '| cut -f1-9'), sep='\t', data.table=F
)

geno_df <- fread(
    file=geno_f, sep='\t', data.table=F
)

pheno_df <- pheno_f %>% lapply(fread, sep='\t', head=F)
for(k in names(pheno_df)){
    colnames(pheno_df[[k]]) <- c('FID', 'IID', k)
}

In [22]:
joined_df <- pheno_df %>% 
reduce(inner_join, by=c('FID', 'IID')) %>%
inner_join(covar_df, by=c('FID', 'IID')) %>%
inner_join(geno_df, by=c('FID', 'IID')) %>%
select(-PAT, -MAT, -PHENOTYPE) %>%
left_join(
    data.frame(
        rs200058074_A = c(0, 1, 2),
        rs200058074 = c('G/G', 'A/G', 'A/A')   
    ), by='rs200058074_A'
) %>%
left_join(
    data.frame(
        rs28991002_G = c(0, 1, 2),
        rs28991002 = c('A/A', 'G/A', 'G/G')   
    ), by='rs28991002_G'
) %>%
left_join(
    data.frame(
        rs28991009_G = c(0, 1, 2),
        rs28991009 = c('T/T', 'G/T', 'G/G')   
    ), by='rs28991009_G'
) %>%
left_join(
    data.frame(
        rs143435072_C = c(0, 1, 2),
        rs143435072 = c('T/T', 'C/T', 'C/C')   
    ), by='rs143435072_C'
) %>%
mutate(
    IOP_measured = if_else(IOP == -9, FALSE, TRUE),
    Glaucoma = Glaucoma - 1
)


In [23]:
show_95CI <- function(estimate, standard_error){
    c(
        estimate,
        estimate - 1.96 * standard_error, 
        estimate + 1.96 * standard_error
    ) %>% print()
    c(
        estimate,
        estimate - 1.96 * standard_error, 
        estimate + 1.96 * standard_error
    ) %>% lapply(exp) %>% simplify() %>%  print()    
}

## 2x2 tables

In [34]:
joined_df %>% filter(IOP_measured == FALSE) %>% count(rs200058074, Glaucoma) %>% print()
joined_df %>% filter(IOP_measured == FALSE) %>% count(rs28991002, Glaucoma) %>% print()
joined_df %>% filter(IOP_measured == FALSE) %>% count(rs28991009, Glaucoma) %>% print()
joined_df %>% filter(IOP_measured == FALSE) %>% count(rs143435072, Glaucoma) %>% print()

“Factor `rs200058074` contains implicit NA, consider using `forcats::fct_explicit_na`”

# A tibble: 6 x 3
  rs200058074 Glaucoma      n
  <fct>          <dbl>  <int>
1 A/A                0 250929
2 A/A                1   4265
3 A/G                0    280
4 A/G                1      1
5 <NA>               0    146
6 <NA>               1      3


“Factor `rs28991002` contains implicit NA, consider using `forcats::fct_explicit_na`”

# A tibble: 7 x 3
  rs28991002 Glaucoma      n
  <fct>         <dbl>  <int>
1 A/A               0      1
2 G/A               0   1258
3 G/A               1     20
4 G/G               0 249949
5 G/G               1   4245
6 <NA>              0    147
7 <NA>              1      4


“Factor `rs28991009` contains implicit NA, consider using `forcats::fct_explicit_na`”

# A tibble: 8 x 3
  rs28991009 Glaucoma      n
  <fct>         <dbl>  <int>
1 G/G               0 246997
2 G/G               1   4214
3 G/T               0   4011
4 G/T               1     44
5 T/T               0     22
6 T/T               1      1
7 <NA>              0    325
8 <NA>              1     10


“Factor `rs143435072` contains implicit NA, consider using `forcats::fct_explicit_na`”

# A tibble: 6 x 3
  rs143435072 Glaucoma      n
  <fct>          <dbl>  <int>
1 C/C                0 251119
2 C/C                1   4266
3 C/T                0    210
4 C/T                1      2
5 <NA>               0     26
6 <NA>               1      1


## glm fit

In [30]:
glm_rs200058074 <- glm (
    Glaucoma ~ age + as.factor(sex) + as.factor(Array) + PC1 + PC2 + PC3 + PC4 + as.factor(rs200058074),
    joined_df %>% filter(IOP_measured == FALSE), family=binomial(link="logit")
)
glm_rs28991002 <- glm (
    Glaucoma ~ age + as.factor(sex) + as.factor(Array) + PC1 + PC2 + PC3 + PC4 + as.factor(rs28991002),
    joined_df %>% filter(IOP_measured == FALSE), family=binomial(link="logit")
)
glm_rs28991009 <- glm (
    Glaucoma ~ age + as.factor(sex) + as.factor(Array) + PC1 + PC2 + PC3 + PC4 + as.factor(rs28991009),
    joined_df %>% filter(IOP_measured == FALSE), family=binomial(link="logit")
)
glm_rs143435072 <- glm (
    Glaucoma ~ age + as.factor(sex) + as.factor(Array) + PC1 + PC2 + PC3 + PC4 + as.factor(rs143435072),
    joined_df %>% filter(IOP_measured == FALSE), family=binomial(link="logit")
)

glm_additive_rs28991009 <- glm (
    Glaucoma ~ age + as.factor(sex) + as.factor(Array) + PC1 + PC2 + PC3 + PC4 + rs28991009,
    joined_df %>% filter(IOP_measured == FALSE), family=binomial(link="logit")
)


In [31]:
glm_rs200058074 %>% summary() %>% print() 
glm_rs28991002  %>% summary() %>% print()
glm_rs28991009  %>% summary() %>% print()
glm_rs143435072 %>% summary() %>% print()
glm_additive_rs28991009  %>% summary() %>% print()


Call:
glm(formula = Glaucoma ~ age + as.factor(sex) + as.factor(Array) + 
    PC1 + PC2 + PC3 + PC4 + as.factor(rs200058074), family = binomial(link = "logit"), 
    data = joined_df %>% filter(IOP_measured == FALSE))

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.3536  -0.2186  -0.1688  -0.1163   3.5119  

Coefficients:
                            Estimate Std. Error z value Pr(>|z|)    
(Intercept)               -10.699826   0.218279 -49.019  < 2e-16 ***
age                         0.094537   0.002449  38.602  < 2e-16 ***
as.factor(sex)1             0.162465   0.031049   5.233 1.67e-07 ***
as.factor(Array)1          -0.010338   0.048960  -0.211    0.833    
PC1                        -0.009133   0.010126  -0.902    0.367    
PC2                        -0.003380   0.010491  -0.322    0.747    
PC3                        -0.001950   0.010071  -0.194    0.846    
PC4                        -0.022257   0.005550  -4.010 6.07e-05 ***
as.factor(rs200058074)A/G  -1.5

In [38]:
rbind(
    glm_rs200058074 %>% summary() %>% coef() %>% tail(1) %>% data.frame() %>% rownames_to_column('variant'),
    glm_rs28991002  %>% summary() %>% coef() %>% tail(1) %>% data.frame() %>% rownames_to_column('variant'),
    glm_rs28991009  %>% summary() %>% coef() %>% tail(2) %>% data.frame() %>% rownames_to_column('variant'),
    glm_rs143435072 %>% summary() %>% coef() %>% tail(1) %>% data.frame() %>% rownames_to_column('variant')
) %>%
data.frame() %>%
rename(
    'Std' = 'Std..Error'
) %>%
mutate(
    l95 = Estimate - 1.96 * Std,
    u95 = Estimate + 1.96 * Std,
    OR = log(Estimate),
    str = paste0(signif(Estimate, digits=2), ' [', signif(l95, digits=2), ', ', signif(u95, digits=2), ']'), 
)

“NaNs produced”

variant,Estimate,Std,z.value,Pr...z..,l95,u95,OR,str
as.factor(rs200058074)A/G,-1.5593015,0.9975723,-1.56309624,0.11802996,-3.5145431,0.3959402,,"-1.6 [-3.5, 0.4]"
as.factor(rs28991002)G/G,4.8093377,72.4628635,0.06636969,0.947083501,-137.2178747,146.8365502,1.5705594,"4.8 [-140, 150]"
as.factor(rs28991009)G/T,-0.450249,0.1527419,-2.94777608,0.003200688,-0.7496231,-0.1508748,,"-0.45 [-0.75, -0.15]"
as.factor(rs28991009)T/T,0.8336501,1.0300361,0.80934072,0.418319191,-1.1852206,2.8525208,-0.1819415,"0.83 [-1.2, 2.9]"
as.factor(rs143435072)C/T,-0.5399543,0.7119385,-0.75842829,0.448194625,-1.9353537,0.8554452,,"-0.54 [-1.9, 0.86]"


#### Note. BETA is computed for G/G above for rs28991002

In [43]:
joined_df %>% filter(IOP_measured == FALSE) %>% select(FID, IID) %>% 
fwrite(
    '/oak/stanford/groups/mrivas/users/ytanigaw/repos/rivas-lab/ANGPTL7/notebook/forest_plot/Glaucoma/cohorts.phe', 
    sep='\t', row.names=F, col.names=F
)