In [1]:
suppressWarnings(suppressPackageStartupMessages({
    library(tidyverse)
    library(data.table)
}))


In [38]:
read_BETAs <- function(beta_f){
    fread(beta_f)
#     %>% rename('CHROM'='#CHROM')
}


In [4]:
read_covar_BETAs <- function(covar_beta_f){
    fread(covar_beta_f)%>%
    column_to_rownames('ID')
}


In [5]:
read_PRS <- function(sscore_f){
    fread(
        cmd=paste('zstdcat', sscore_f),
        select=c('#FID', 'IID', 'SCORE1_SUM'),
        colClasses=c('#FID'='character', 'IID'='character')
    ) %>%
    rename('FID'='#FID', 'geno_score'='SCORE1_SUM')
}


In [6]:
perform_eval <- function(response, pred, metric.type){
    if(metric.type == 'r2'){
        summary(lm(response ~ 1 + pred))$r.squared
    }else{
#         pROC::auc(pROC::roc(response, pred))        
        pred.obj <- ROCR::prediction(pred, factor(response - 1))
        auc.obj <- ROCR::performance(pred.obj, measure = 'auc')
        auc.obj@y.values[[1]]
    }
}


In [7]:
compute_covar_scores <- function(phe_df, covar_BETAs){    
    as.matrix(
        phe_df %>% select(all_of(covar_BETAs %>% rownames(ID)))
    ) %*% as.matrix(covar_BETAs) %>%
    as.data.frame() %>%
    rownames_to_column('ID') %>%
    separate(ID, c('FID', 'IID')) %>% 
    rename('covar_score'='BETA')
}


In [24]:
build_eval_df <- function(score_test_df, metrc.type){
    data.frame(
        geno       = perform_eval(
            score_test_df$phe,
            score_test_df$geno_score,
            metric.type
        ),
        covar      = perform_eval(
            score_test_df$phe,
            score_test_df$covar_score,
            metric.type
        ),
        geno_covar = perform_eval(
            score_test_df$phe,
            score_test_df$geno_covar_score,
            metric.type
        ),
        stringsAsFactors = F
    )
}


In [35]:
eval_main <- function(phe, phe_f, PRS_f, BETAs_f, covar_BETAs_f){
# # input
# phe <- 'INI50'
# phe_f <- '/oak/stanford/groups/mrivas/projects/biobank-methods-dev/snpnet-elastic-net/phenotype.phe'
# PRS_f <- '/scratch/groups/mrivas/projects/biobank-methods-dev/snpnet-imp/INI50/INI50.sscore.zst'
# covar_BETAs_f <- '/scratch/groups/mrivas/projects/biobank-methods-dev/snpnet-imp/INI50/snpnet.covars.tsv'
    
    metric.type <- ifelse(str_replace_all(phe, '[0-9]', '') %in% c('INI', 'QT_FC'), 'r2', 'auc')

    covar_BETAs <- read_covar_BETAs(covar_BETAs_f)

    phe_df <- fread(phe_f, colClasses=c('FID'='character', 'IID'='character')) %>%
    mutate(ID = paste(FID, IID, sep='_')) %>%
    column_to_rownames('ID')

    PRS_df <- read_PRS(PRS_f) 
    
    BETAs <- read_BETAs(BETAs_f)

    df <- phe_df %>%
    compute_covar_scores(covar_BETAs) %>%
    left_join(
        phe_df %>% select(FID, IID, split, all_of(phe)),
        by=c('FID', 'IID')
    ) %>%
    left_join(PRS_df,by=c('FID', 'IID')) %>%
    mutate(
        geno_covar_score = geno_score + covar_score
    ) %>%
    drop_na(all_of(phe)) %>%
    filter(phe != -9) %>%
    rename('phe' := phe)

    c('train', 'val', 'test') %>%
    lapply(function(s){
        df %>% filter(split == s) %>% build_eval_df(metric.type) %>%
        mutate(
            phe = phe,
            split = s,
            n_variables = nrow(BETAs)
        )
    }) %>% bind_rows()
}

In [39]:
eval_main(
    phe     = 'INI50',
    phe_f   = '/oak/stanford/groups/mrivas/projects/biobank-methods-dev/snpnet-elastic-net/phenotype.phe',
    PRS_f   = '/scratch/groups/mrivas/projects/biobank-methods-dev/snpnet-imp/INI50/INI50.sscore.zst',
    BETAs_f = '/scratch/groups/mrivas/projects/biobank-methods-dev/snpnet-imp/INI50/snpnet.tsv',
    covar_BETAs_f = '/scratch/groups/mrivas/projects/biobank-methods-dev/snpnet-imp/INI50/snpnet.covars.tsv'
) -> eval_df_INI50_imp


In [40]:
eval_df_INI50_imp

geno,covar,geno_covar,phe,split,n_variables
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<int>
0.2563138,0.5337935,0.7737875,INI50,train,28402
0.164444,0.529842,0.6946285,INI50,val,28402
0.1623798,0.5328445,0.6962549,INI50,test,28402


In [41]:
eval_main(
    phe     = 'INI50',
    phe_f   = '/oak/stanford/groups/mrivas/projects/biobank-methods-dev/snpnet-elastic-net/phenotype.phe',
    PRS_f   = '/scratch/groups/mrivas/projects/biobank-methods-dev/snpnet-elastic-net/INI50_0.9//INI50.sscore.zst',
    BETAs_f = '/scratch/groups/mrivas/projects/biobank-methods-dev/snpnet-elastic-net/INI50_0.9//snpnet.tsv',
    covar_BETAs_f = '/scratch/groups/mrivas/projects/biobank-methods-dev/snpnet-elastic-net/INI50_0.9//snpnet.covars.tsv'
) -> eval_df_INI50_elnet09


In [42]:
eval_df_INI50_elnet09

geno,covar,geno_covar,phe,split,n_variables
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<int>
0.302242,0.5337417,0.8281844,INI50,train,48256
0.1638982,0.5298043,0.6990818,INI50,val,48256
0.1617773,0.5327939,0.6998016,INI50,test,48256
