In [1]:
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(data.table))


In [2]:
covars <- c('age', 'sex', 'Array', paste0('PC', 1:4))

In [3]:
covar_f <- '/oak/stanford/groups/mrivas/ukbb24983/sqc/ukb24983_GWAS_covar.phe'
covar_df <- fread(
    covar_f, sep='\t', data.table=F,
    select=c('FID', 'IID', covars)
)

In [4]:
pheno_f <- '/oak/stanford/groups/mrivas/projects/ANGPTL7/ukbb_GCTA/IOP_glaucoma.phe'
pheno_df <- pheno_f %>% fread(sep='\t', head=T)

In [5]:
geno_annot_dfs <- list(
    rs200058074 = data.frame(
        rs200058074_A = c(0, 1, 2),
        rs200058074 = c('G/G', 'A/G', 'A/A')   
    ),    
    rs28991002 = data.frame(
        rs28991002_G = c(0, 1, 2),
        rs28991002 = c('A/A', 'G/A', 'G/G')   
    ),
    rs28991009 = data.frame(
        rs28991009_G = c(0, 1, 2),
        rs28991009 = c('T/T', 'G/T', 'G/G')   
    ),
    rs143435072 = data.frame(
        rs143435072_C = c(0, 1, 2),
        rs143435072 = c('T/T', 'C/T', 'C/C') 
    )    
)


In [6]:
geno_f <- 'ukb24983_cal_cALL_v2_hg19_ANGPTL7_protein-altering_vars.raw'
geno_df <- fread(
    file=geno_f, sep='\t', data.table=F
) %>%
left_join(geno_annot_dfs[['rs200058074']], by='rs200058074_A') %>%
left_join(geno_annot_dfs[['rs28991002']],  by='rs28991002_G') %>%
left_join(geno_annot_dfs[['rs28991009']],  by='rs28991009_G') %>%
left_join(geno_annot_dfs[['rs143435072']], by='rs143435072_C')


In [7]:
master_df <- geno_df %>%
select(c(IID, names(geno_annot_dfs))) %>%
inner_join(pheno_df %>% select(-FID), by='IID') %>%
inner_join(covar_df %>% select(-FID), by='IID') %>% 
mutate(
    hasIOP = !(
        is.na(INI2005254) & is.na(INI2005255) & 
        is.na(INI5254) & is.na(INI5255) & 
        is.na(INI5262) & is.na(INI5263)
    )
)


In [8]:
compose_formula <- function(x, y){
    stats::formula(paste0(y, ' ~ as.factor(sex) + as.factor(Array) + PC1 + PC2 + PC3 + PC4 + as.factor(', x, ')'))
}


In [9]:
format_lmfit <- function(lmfit, len_covars = 0, is_binomial=FALSE){
    lmfitdf <- summary(lmfit)$coefficients %>%
    data.frame() %>% 
    rownames_to_column('ID') %>%
    mutate(
        ID = str_replace_all(ID, 'as.factor|[()]', '')
    )
    names(lmfitdf) <- c('ID', 'BETA', 'SE', 't.value', 'P')

    lmfitdf_tail <- lmfitdf %>% tail(nrow(lmfitdf)-(len_covars))
    
    if(is_binomial){
        lmfitdf_tail %>% mutate(
            OR_str = paste0(
                signif(exp(BETA), digits=2), 
                ' [', signif(exp(BETA-1.96*SE), digits=2), ', ', 
                signif(exp(BETA+1.96*SE), digits=2), ']'
            )
        )
    }else{
        lmfitdf_tail %>% mutate(
            BETA_str = paste0(
                signif(BETA, digits=2), 
                ' [', signif(BETA-1.96*SE, digits=2), ', ', 
                signif(BETA+1.96*SE, digits=2), ']'
            )
        )
    }
}


In [10]:
IOP_betas <- bind_rows(lapply(
    c('INI2005254', 'INI2005255', 'INI5254', 'INI5255', 'INI5262', 'INI5263'),
    function(y){
        bind_rows(lapply(
            names(geno_annot_dfs),
            function(x){
                lm(compose_formula(x, y), master_df %>% filter(hasIOP)) %>%
                format_lmfit(length(covars)) %>%
                mutate(GBE_ID = y) %>%
                rename(variant = ID) %>%
                select(GBE_ID, variant, BETA_str, P, BETA, SE, t.value)
            }
        ))
    }
))


In [11]:
IOP_betas %>% fwrite('regression_IOP.tsv', sep='\t')


In [14]:
glaucoma_OR <- bind_rows(lapply(
    c('HC276'),
    function(y){
        bind_rows(lapply(
            names(geno_annot_dfs),
            function(x){
                glm(compose_formula(x, y), master_df %>% filter(! hasIOP), family=binomial(link="logit")) %>%
                format_lmfit(length(covars), is_binomial = T) %>%
                mutate(GBE_ID = y) %>%
                rename(variant = ID) %>%
                select(GBE_ID, variant, OR_str, P, BETA, SE, t.value)
            }
        ))
    }
))


In [15]:
glaucoma_OR %>% fwrite('regression_Glaucoma.tsv', sep='\t')
