In [1]:
suppressWarnings(suppressPackageStartupMessages({
    library(tidyverse)
    library(data.table)
}))


In [2]:
# input
phe_f <- '/oak/stanford/groups/mrivas/projects/biobank-methods-dev/snpnet-elastic-net/phenotype.phe'
PRS_d <- '/oak/stanford/groups/mrivas/projects/biobank-methods-dev/snpnet-SBayesR/SBayesR-exclude-mhc'
covar_score_d <- '/oak/stanford/groups/mrivas/projects/biobank-methods-dev/snpnet-PRScs/covar_betas_train_val'

# constants
covars <- c('age', 'sex', paste0('PC', 1:10))

# output
out_f <- 'SBayesR-exclude-mhc.eval.tsv'


In [3]:
read_BETAs <- function(beta_f){
    fread(beta_f)
}


In [4]:
read_PRS <- function(sscore_f){
    fread(
        cmd=paste('zstdcat', sscore_f),
        select=c('#FID', 'IID', 'SCORE1_SUM'),
        colClasses=c('#FID'='character', 'IID'='character')
    ) %>%
    rename('FID'='#FID', 'geno_score'='SCORE1_SUM')
}


In [5]:
read_covar_score <- function(covar_score_f){
    fread(
        cmd=paste('zstdcat', covar_score_f),
        select=c('#FID', 'IID', 'Estimate'),
        colClasses=c('#FID'='character', 'IID'='character')
    ) %>%
    rename('FID'='#FID', 'covar_score'='Estimate')
    
}


In [6]:
perform_eval <- function(response, pred, metric.type){
    if(metric.type == 'r2'){
        summary(lm(response ~ 1 + pred))$r.squared
    }else{
#         pROC::auc(pROC::roc(response, pred))        
        pred.obj <- ROCR::prediction(pred, factor(response - 1))
        auc.obj <- ROCR::performance(pred.obj, measure = 'auc')
        auc.obj@y.values[[1]]
    }
}


In [7]:
phe_df <- fread(phe_f, colClasses=c('FID'='character', 'IID'='character')) %>%
mutate(ID = paste(FID, IID, sep='_')) %>%
column_to_rownames('ID')


In [8]:
eval_df <- c('INI50', 'INI21001', 'HC269', 'HC382') %>%
lapply(function(phe){
    metric.type <- ifelse(str_replace_all(phe, '[0-9]', '') %in% c('INI', 'QT_FC'), 'r2', 'auc')

    df <- phe_df %>% 
    select(all_of(c('FID', 'IID', phe, 'split'))) %>%
    rename(!!'phe' := all_of(phe)) %>%
    left_join(
        read_PRS(file.path(PRS_d, sprintf('%s.sscore.zst', phe))),
        by=c("FID", "IID")
    ) %>%
    left_join(
        read_covar_score(file.path(covar_score_d, sprintf('%s.covar.scores.tsv', phe))), 
        by=c("FID", "IID")
    ) %>%
    mutate(geno_covar_score = geno_score + covar_score) %>%
    drop_na(phe) %>%
    filter(phe != -9)

    c('train', 'val', 'test') %>%
    lapply(function(split_string){
        score_test_df <- df %>%
        filter(split == split_string)

        data.frame(
            phe     = phe,
            n_variables = read_BETAs(
                file.path(PRS_d, sprintf('%s.snpRes.plink.tsv', phe))
            ) %>% nrow(),
            geno       = perform_eval(
                score_test_df$phe,
                score_test_df$geno_score,
                metric.type
            ),
            covar      = perform_eval(
                score_test_df$phe,
                score_test_df$covar_score,
                metric.type
            ),
            geno_covar = perform_eval(
                score_test_df$phe,
                score_test_df$geno_covar_score,
                metric.type
            ),
            split=split_string,
            stringsAsFactors = F
        )    
    }) %>% bind_rows()
}) %>% bind_rows()


In [9]:
eval_df %>%
fwrite(out_f, sep='\t', na = "NA", quote=F)
