In [1]:
suppressWarnings(suppressPackageStartupMessages({
    library(tidyverse)
    library(data.table)
}))


In [2]:
# input
phe_f <- '/oak/stanford/groups/mrivas/projects/biobank-methods-dev/snpnet-elastic-net/phenotype.phe'
PRS_d <- '/oak/stanford/groups/mrivas/projects/biobank-methods-dev/snpnet-SBayesR/SBayesR-chr_merge-exclude-mhc'
covar_score_d <- '/oak/stanford/groups/mrivas/projects/biobank-methods-dev/snpnet-PRScs/covar_betas_train_val'

# constants
covars <- c('age', 'sex', paste0('PC', 1:10))

# output
out_f <- 'SBayesR.eval.SBayesR-chr_merge-exclude-mhc.tsv'


In [3]:
read_BETAs <- function(beta_f){
    fread(beta_f)
}


In [4]:
read_PRS <- function(sscore_f){
    fread(
        cmd=paste('zstdcat', sscore_f),
        select=c('#FID', 'IID', 'SCORE1_SUM'),
        colClasses=c('#FID'='character', 'IID'='character')
    ) %>%
    rename('FID'='#FID', 'geno_score'='SCORE1_SUM')
}


In [5]:
read_covar_score <- function(covar_score_f){
    fread(
        cmd=paste('zstdcat', covar_score_f),
        select=c('#FID', 'IID', 'Estimate'),
        colClasses=c('#FID'='character', 'IID'='character')
    ) %>%
    rename('FID'='#FID', 'covar_score'='Estimate')
    
}


In [6]:
perform_eval <- function(response, pred, metric.type){
    if(metric.type == 'r2'){
        summary(lm(response ~ 1 + pred))$r.squared
    }else{
#         pROC::auc(pROC::roc(response, pred))        
        pred.obj <- ROCR::prediction(pred, factor(response - 1))
        auc.obj <- ROCR::performance(pred.obj, measure = 'auc')
        auc.obj@y.values[[1]]
    }
}


In [8]:
eval_line_build <- function(score_test_df, phe, PRS_d, metric.type, split_string){
    data.frame(
        phe     = phe,
        n_variables = read_BETAs(
            file.path(PRS_d, sprintf('%s.snpRes.plink.tsv', phe))
        ) %>% nrow(),
        geno       = perform_eval(
            score_test_df$phe,
            score_test_df$geno_score,
            metric.type
        ),
        covar      = perform_eval(
            score_test_df$phe,
            score_test_df$covar_score,
            metric.type
        ),
        geno_covar = perform_eval(
            score_test_df$phe,
            score_test_df$geno_covar_score,
            metric.type
        ),
        split=split_string,
        stringsAsFactors = F
    )    
}


In [7]:
phe_df <- fread(phe_f, colClasses=c('FID'='character', 'IID'='character')) %>%
mutate(ID = paste(FID, IID, sep='_')) %>%
column_to_rownames('ID')


In [12]:
eval_df <- c('INI50', 'INI21001', 'HC269', 'HC382') %>%
lapply(function(phe){
    metric.type <- ifelse(str_replace_all(phe, '[0-9]', '') %in% c('INI', 'QT_FC'), 'r2', 'auc')

    df <- phe_df %>% 
    select(all_of(c('FID', 'IID', phe, 'split'))) %>%
    rename(!!'phe' := all_of(phe)) %>%
    left_join(
        read_PRS(file.path(PRS_d, sprintf('%s.sscore.zst', phe))),
        by=c("FID", "IID")
    ) %>%
    left_join(
        read_covar_score(file.path(covar_score_d, sprintf('%s.covar.scores.tsv', phe))), 
        by=c("FID", "IID")
    ) %>%
    mutate(geno_covar_score = geno_score + covar_score) %>%
    drop_na(phe) %>%
    filter(phe != -9)
    
    bind_rows(
        df %>%
        filter(split %in% c('train', 'val'))%>%
        eval_line_build(phe, PRS_d, metric.type, 'train+val'),
        
        df %>%
        filter(split == 'test')%>%
        eval_line_build(phe, PRS_d, metric.type, 'test')
    )
}) %>% bind_rows()


In [13]:
eval_df

phe,n_variables,geno,covar,geno_covar,split
<chr>,<int>,<dbl>,<dbl>,<dbl>,<chr>
INI50,658693,0.2830452,0.53343941,0.8138138,train+val
INI50,658693,0.1663268,0.533574168,0.7011915,test
INI21001,658693,0.4797278,0.010430321,0.4853468,train+val
INI21001,658693,0.1151966,0.009921578,0.1241868,test
HC269,658693,0.8653378,0.693475033,0.872127,train+val
HC269,658693,0.6333911,0.688961985,0.7324951,test
HC382,658693,0.8869417,0.53442219,0.8837693,train+val
HC382,658693,0.6233613,0.53706763,0.6277625,test


In [14]:
eval_df %>%
fwrite(out_f, sep='\t', na = "NA", quote=F)
