# PRS map - summary plots - histogram of incremental predictive performance

Yosuke Tanigawa


## library, functions, and constants

In [1]:
suppressWarnings(suppressPackageStartupMessages({
    library(tidyverse)
    library(data.table)
}))


In [2]:
source('paths.sh')
devtools::load_all(cud4_d)
devtools::load_all(dirname(dirname(snpnet_helper)))
source(snpnet_helper)


[36mℹ[39m Loading [34m[34mcud4[34m[39m

[36mℹ[39m Loading [34m[34msnpnet[34m[39m



In [3]:
traits_w_metrics_f %>%
fread() %>%
rename_with(
    function(x){str_replace(x, '#', '')}, starts_with("#")
) -> traits_w_metrics_df


eval_fullwDelta_f %>%
fread() %>%
rename_with(
    function(x){str_replace(x, '#', '')}, starts_with("#")
) -> eval_long_df


## Histogram - incremental predictive performance

In [6]:
plot_incremental_predictive_performance <- function(eval_long_df, metric_selected){
    eval_long_df %>%
    filter(
        WBtest_is_significant,
        metric == metric_selected,
        split == "test",
        model == "delta"
    ) %>%
    ggplot(aes(x = value)) +
    geom_vline(xintercept = 0, color='red') +
    geom_histogram(bins=30) +
    theme_bw(base_size = 16) +
    labs(
        y = 'Number of traits'
    )
}


In [24]:
for(ext in c('png', 'pdf')){
    ggsave(
        file.path("plots", sprintf('incremental-R2.%s', ext)),
        gridExtra::arrangeGrob(
            # Nagelkerke's pseudo-R2
            eval_long_df %>%
            filter(family == 'binomial') %>%
            plot_incremental_predictive_performance("NagelkerkeR2") +
            labs(
                title = 'Binary traits (Binomial model)',
                x = latex2exp::TeX("Incremental Nagelkerke's $\\\\textit{R}^{2}$")
            ),
            # quantitative traits (R2)
            eval_long_df %>%
            filter(family == 'gaussian') %>%
            plot_incremental_predictive_performance("r2") +
            labs(
                title = 'Quantitative traits (Gaussian model)',
                x = latex2exp::TeX('Incremental \\textit{R}$^2$')
            ),
            ncol=2
        ),
        width=16, height=8
    )
    
    ggsave(
        file.path("plots", sprintf('incremental-R2noBiomarkers.%s', ext)),
        # quantitative traits (R2), without biomarker traits
        eval_long_df %>%
        filter(family == 'gaussian') %>%
        filter(trait_category != "Biomarkers") %>%
        plot_incremental_predictive_performance("r2") +
        labs(
            title = 'Quantitative traits (Gaussian model)\n(non-biomarker traits only)',
            x = latex2exp::TeX('Incremental \\textit{R}$^2$')
        ),
        width=8, height=8
    )

    ggsave(
        file.path("plots", sprintf('incremental-AUC.%s', ext)),
        # binomial traits (AUC)
        eval_long_df %>%
        filter(family == 'binomial') %>%
        plot_incremental_predictive_performance("auc") +
        labs(
            title = 'Binary traits (Binomial model)',
            x = latex2exp::TeX('Incremental AUC')
        ),
        width=8, height=8
    )
    
    ggsave(
        file.path("plots", sprintf('incremental-TjurR2.%s', ext)),
        # Tjur's pseudo-R2
        eval_long_df %>%
        filter(family == 'binomial') %>%
        plot_incremental_predictive_performance("TjurR2") +
        labs(
            title = 'Binary traits (Binomial model)',
            x = latex2exp::TeX("Incremental Tjur's $\\\\textit{R}^{2}$")
        ),
        width=8, height=8
    )
}
