# PRS map - summary plots

Yosuke Tanigawa


## library, functions, and constants

In [1]:
suppressWarnings(suppressPackageStartupMessages({
    library(tidyverse)
    library(data.table)
}))


In [2]:
source('paths.sh')
devtools::load_all(cud4_d)
devtools::load_all(dirname(dirname(snpnet_helper)))
source(snpnet_helper)


[36mℹ[39m Loading [34m[34mcud4[34m[39m

[36mℹ[39m Loading [34m[34msnpnet[34m[39m



In [3]:
traits_w_metrics_f %>%
fread() %>%
rename_with(
    function(x){str_replace(x, '#', '')}, starts_with("#")
) -> traits_w_metrics_df


eval_fullwDelta_f %>%
fread() %>%
rename_with(
    function(x){str_replace(x, '#', '')}, starts_with("#")
) -> eval_long_df


In [4]:
get_split_order_df <- function(){
    data.frame(
        split = c('non_british_white', 's_asian', 'e_asian', 'african'),
        split_plot = c('Non-British white', 'South Asian', 'East Asian', 'African'),
        split_order=1:4, stringsAsFactors=F
    )
}


In [5]:
get_split_color_dict <- function(){
    setNames(
        c(cud4_colors()['gray'], cud4_colors()['purple'], cud4_colors()['sky_blue'], cud4_colors()['orange'] ),
        c('Non-British white', 'South Asian', 'East Asian', 'African')
    )
}


In [12]:
transferability_eval_func <- function(ancestry_group_metric, WB_test_set_metric){
    ancestry_group_metric - WB_test_set_metric
}


In [173]:
eval_long_df %>%
filter(
    split %in% c('test', 'non_british_white', 'african', 's_asian', 'e_asian')
) %>% 
select(all_of(c("trait", "split", "metric", "model", "value"))) %>% 
pivot_wider(
    names_from = c("split"),
    values_from = c("value")
) %>% 
mutate(across(
    c('non_british_white', 'african', 's_asian', 'e_asian'),
    function(x){ transferability_eval_func(x, test) }
)) %>%
pivot_longer(
    all_of(c('non_british_white', 'african', 's_asian', 'e_asian')),
    names_to = "split",
    values_to = "delta_vs_WBtest"
) %>% 
drop_na(delta_vs_WBtest) %>%
rename(
    'delta_WBtest'='test'
) %>%
left_join(
    traits_w_metrics_df %>% 
    select(all_of(c(
        'trait', 'trait_name', 'trait_category',
        'trait_category_plot', 'trait_label',
        'family', 'WBtest_is_significant'
    ))) %>% unique(),
    by = "trait"
) %>%
left_join(
    get_split_order_df(),
    by = "split"
) -> transferability_df


In [174]:
transferability_df %>%
filter(
    WBtest_is_significant,
    metric %in% c("r2", "NagelkerkeR2"),
    model == "delta"
) %>%
count(split, family) %>%
spread(family, n)


split,binomial,gaussian
<chr>,<int>,<int>
african,244,569
e_asian,229,569
non_british_white,244,569
s_asian,243,569


This is consistent with what we have

In [175]:
eval_long_df %>%
drop_na(value) %>%
filter(
    WBtest_is_significant,
    model %in% c('full'),
    split %in% c('non_british_white', 'african', 's_asian', 'e_asian'),
    metric %in% c('r2', 'NagelkerkeR2')
) %>%
count(split, family) %>%
spread(family, n)


split,binomial,gaussian
<chr>,<int>,<int>
african,244,569
e_asian,229,569
non_british_white,244,569
s_asian,243,569


### median of the diff(incremental predictive performance in other ancestry group vs WB)

In [179]:
transferability_df %>%
filter(
    WBtest_is_significant,
    model == "delta"
) %>%
group_by(family, metric, split) %>% 
summarise(median = median(delta_vs_WBtest), `.groups` = 'drop') -> transferability_median_df


In [180]:
# add non-biomarker traits only analysis
transferability_median_df %>%
bind_rows(
    transferability_df %>%
    filter(trait_category != "Biomarkers") %>%
    filter(
        WBtest_is_significant,
        model == "delta",
        metric == "r2"
    ) %>%
    mutate(metric = "r2noBiomarkers") %>%
    group_by(family, metric, split) %>% 
    summarise(median = median(delta_vs_WBtest), `.groups` = 'drop')   
) -> transferability_median_df


In [181]:
transferability_median_df


family,metric,split,median
<chr>,<chr>,<chr>,<dbl>
binomial,auc,african,-0.0236069529
binomial,auc,e_asian,-0.0136897119
binomial,auc,non_british_white,-0.0048809882
binomial,auc,s_asian,-0.0132231359
binomial,NagelkerkeR2,african,-0.0106352756
binomial,NagelkerkeR2,e_asian,-0.0077651806
binomial,NagelkerkeR2,non_british_white,-0.0017294751
binomial,NagelkerkeR2,s_asian,-0.0065753702
binomial,TjurR2,african,-0.0034546243
binomial,TjurR2,e_asian,-0.0021750431


In [182]:
transferability_median_df %>%
pivot_wider(
    names_from = c("family", "metric"),
    values_from = c("median")
) %>%
left_join(
    get_split_order_df(),
    by = "split"
) %>%
arrange(split_order)

split,binomial_auc,binomial_NagelkerkeR2,binomial_TjurR2,gaussian_r2,gaussian_r2noBiomarkers,split_plot,split_order
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<int>
non_british_white,-0.004880988,-0.001729475,-0.0003712097,-0.001433456,-0.001602946,Non-British white,1
s_asian,-0.013223136,-0.00657537,-0.0020813761,-0.011471158,-0.011654874,South Asian,2
e_asian,-0.013689712,-0.007765181,-0.0021750431,-0.008200746,-0.008518088,East Asian,3
african,-0.023606953,-0.010635276,-0.0034546243,-0.01942201,-0.019701499,African,4


### get the slope of regression

In [184]:
lapply(unique(transferability_df$metric), function(metric_selected){
    c('non_british_white', 'african', 's_asian', 'e_asian') %>%
    lapply(function(pop){
        glm(
            (delta_WBtest + delta_vs_WBtest) ~ 0 + delta_WBtest,
            data = transferability_df %>%
            filter(
                WBtest_is_significant,
                model == "delta",
                metric == metric_selected,
                split == pop
            )
        ) %>%
        fit_to_df() %>%
        mutate(split = pop, metric = metric_selected)  
    }) %>% bind_rows()
}) %>% bind_rows() %>%
bind_rows(
    # non-biomarker traits-only analysis
    c('non_british_white', 'african', 's_asian', 'e_asian') %>%
    lapply(function(pop){
        glm(
            (delta_WBtest + delta_vs_WBtest) ~ 0 + delta_WBtest,
            data = transferability_df %>%
            filter(trait_category != "Biomarkers") %>%
            filter(
                WBtest_is_significant,
                model == "delta",
                metric == "r2",
                split == pop
            )
        ) %>%
        fit_to_df() %>%
        mutate(split = pop, metric = "r2noBiomarkers")  
    }) %>% bind_rows()
) %>%
left_join(
    transferability_df %>%
    filter(
        WBtest_is_significant,
        model == "delta"
    ) %>%
    count(metric, family, split),
    by = c("split", "metric")
) %>%
left_join(
    get_split_order_df(),
    by = "split"
) %>%
arrange(family, metric, split_order) -> regression_slopes_df


In [185]:
regression_slopes_df %>%
select(all_of(c(
    "family", "metric", "split", "n", "variable", "estimate", "SE", "z_or_t_value", "P"
)))


family,metric,split,n,variable,estimate,SE,z_or_t_value,P
<chr>,<chr>,<chr>,<int>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
binomial,auc,non_british_white,244.0,delta_WBtest,0.59823332,0.013207264,45.2957805,1.8012420000000003e-120
binomial,auc,s_asian,242.0,delta_WBtest,0.09520928,0.019663027,4.8420461,2.2984e-06
binomial,auc,e_asian,218.0,delta_WBtest,0.01120164,0.024822109,0.4512768,0.6522408
binomial,auc,african,241.0,delta_WBtest,-0.08594696,0.018821708,-4.5663742,7.933224e-06
binomial,NagelkerkeR2,non_british_white,244.0,delta_WBtest,0.80420983,0.009826782,81.8385775,6.888882e-179
binomial,NagelkerkeR2,s_asian,243.0,delta_WBtest,0.02695902,0.018509827,1.4564707,0.1465588
binomial,NagelkerkeR2,e_asian,229.0,delta_WBtest,0.05885148,0.035447013,1.6602662,0.09823555
binomial,NagelkerkeR2,african,244.0,delta_WBtest,-0.14497497,0.015675375,-9.2485806,1.2086510000000001e-17
binomial,TjurR2,non_british_white,244.0,delta_WBtest,0.85503002,0.010261889,83.3209165,1.020125e-180
binomial,TjurR2,s_asian,243.0,delta_WBtest,-0.02857201,0.014965142,-1.9092377,0.05741414


## Violin plots

In [108]:
plot_transferability_violin <- function(transferability_df, metric_selected, ypos = c(.10, .10, .10, .10)){
    transferability_df %>%
    filter(
        WBtest_is_significant,
        model == "delta",
        metric == metric_selected
    ) %>%
    ggplot(aes(
        x=reorder(split_plot, split_order),
        y=delta_vs_WBtest,
        color=reorder(split_plot, split_order)
    )) + 
    geom_hline(yintercept = 0, color='gray') +
    geom_violin() +
    geom_jitter(height = 0, width = 0.3) +
    theme_bw(base_size = 16) + 
    scale_color_manual(values=get_split_color_dict()) +
    labs(
        x = 'Ancestry group in UK Biobank'
    ) +
    theme(legend.position = 'none') +
    stat_summary(
        fun = "median",
        geom = "crossbar", 
        width = 0.8,
        color = "black"
    ) + 
    annotate(
        geom="text", x = 1, y = ypos[1], color="black", size = 6,
        label = transferability_median_df %>%
            filter(
                split == 'non_british_white',
                metric == metric_selected
            ) %>% pull(median) %>% round(5)
    ) + 
    annotate(
        geom="text", x = 2, y = ypos[2], color="black", size = 6,
        label = transferability_median_df %>%
            filter(
                split == 's_asian',
                metric == metric_selected
            ) %>% pull(median) %>% round(5)
    ) + 
    annotate(
        geom="text", x = 3, y = ypos[3], color="black", size = 6,
        label = transferability_median_df %>%
            filter(
                split == 'e_asian',
                metric == metric_selected
            ) %>% pull(median) %>% round(5)
    ) + 
    annotate(
        geom="text", x = 4, y = ypos[4], color="black", size = 6,
        label = transferability_median_df %>%
            filter(
                split == 'african',
                metric == metric_selected
            ) %>% pull(median) %>% round(5)
    )
}



In [111]:
p_trans_violin_gaussian <- transferability_df %>%
filter(
    family == "gaussian"
) %>%
plot_transferability_violin("r2") +
labs(
    title = 'Quantitative traits (Gaussian model)',
    y = latex2exp::TeX("The difference in incremental \\textit{R}$^2$ compared against white British")
)

In [186]:
p_trans_violin_gaussian_noBiomarkers <- transferability_df %>%
filter(
    family == "gaussian"
) %>%
filter(trait_category != "Biomarkers") %>%
mutate(
    metric = if_else(metric == "r2", "r2noBiomarkers", metric)
) %>%
plot_transferability_violin("r2noBiomarkers") +
labs(
    title = 'Quantitative traits (Gaussian model)\n(non-Biomarker traits only)',
    y = latex2exp::TeX("The difference in incremental \\textit{R}$^2$ compared against white British")
)


In [112]:
p_trans_violin_NagelkerkeR2 <- transferability_df %>%
filter(
    family == "binomial"
) %>%
plot_transferability_violin("NagelkerkeR2") +
labs(
    title = 'Binary traits (Binomial model)',
    y = latex2exp::TeX("The difference in incremental Nagelkerke's \\textit{R}$^2$ compared against white British")
)

In [113]:
p_trans_violin_AUC <- transferability_df %>%
filter(
    family == "binomial"
) %>%
plot_transferability_violin("auc", c(.10, .05, .05, .05)) +
labs(
    title = 'Binary traits (Binomial model)',
    y = latex2exp::TeX("The difference in incremental AUC compared against white British")
)

In [114]:
p_trans_violin_TjurR2 <- transferability_df %>%
filter(
    family == "binomial"
) %>%
plot_transferability_violin("TjurR2", c(.05, .05, .05, .05)) +
labs(
    title = 'Binary traits (Binomial model)',
    y = latex2exp::TeX("The difference in incremental Tjur's \\textit{R}$^2$ compared against white British")
)

## Scatter plots

In [133]:
plot_transferability_scatter <- function(
    transferability_df, metric_selected,
    xpos_base = 0.5,
    ypos = c(.0, .0, .0, .0)
){
    transferability_df %>%
    filter(
        WBtest_is_significant,
        model == "delta",
        metric == metric_selected
    ) %>%
    ggplot(aes(
        x=delta_WBtest,
        y=delta_vs_WBtest + delta_WBtest,
        color=reorder(split_plot, split_order)
    )) -> plt_obj

    for(split_plot_str in names(get_split_color_dict())){
        this_slope <- regression_slopes_df %>%
        filter(
            metric == metric_selected,
            split_plot == split_plot_str
        ) %>%
        pull(estimate)
        plt_obj <- plt_obj + geom_abline(
            color=get_split_color_dict()[[split_plot_str]],
            linetype = "dashed",
            intercept=0,
            slope = this_slope
        ) +
        annotate(
            geom="text",
            x = xpos_base,
            y = xpos_base * this_slope + ypos[which(names(get_split_color_dict()) == split_plot_str)],
            color="black",
            size = 6,
            label = sprintf('y = %.03fx', this_slope)
        )
    }

    plt_obj +
    geom_point() +
    theme_bw(base_size = 16) + 
    scale_color_manual(values=get_split_color_dict()) +
    labs(
        color = 'Ancestry group in UK Biobank'
    ) +
    theme(legend.position = c(.28,.85))
}


In [145]:
p_trans_scatter_gaussian <- transferability_df %>%
filter(
    family == "gaussian"
) %>%
plot_transferability_scatter(
    "r2",
    xpos_base = .3,
    ypos = c(0, 0.01, -0.01, 0)
) +
labs(
    title = 'Quantitative traits (Gaussian model)',
    x = latex2exp::TeX('Incremental \\textit{R}$^2$ in white British'),
    y = latex2exp::TeX('Incremental \\textit{R}$^2$')
)


In [188]:
p_trans_scatter_gaussian_noBiomarkers <- transferability_df %>%
filter(
    family == "gaussian"
) %>%
filter(trait_category != "Biomarkers") %>%
mutate(
    metric = if_else(metric == "r2", "r2noBiomarkers", metric)
) %>%
plot_transferability_scatter(
    "r2noBiomarkers",
    xpos_base = .3,
    ypos = c(0, 0.01, -0.01, 0)
) +
labs(
    title = 'Quantitative traits (Gaussian model)\n(non-Biomarker traits only)',
    x = latex2exp::TeX('Incremental \\textit{R}$^2$ in white British'),
    y = latex2exp::TeX('Incremental \\textit{R}$^2$')
)


In [146]:
p_trans_scatter_NagelkerkeR2 <- transferability_df %>%
filter(
    family == "binomial"
) %>%
plot_transferability_scatter(
    "NagelkerkeR2",
    ypos = c(0, -0.03, 0.02, 0)
) +
labs(
    title = 'Binary traits (Binomial model)',
    x = latex2exp::TeX("Incremental Nagelkerke's \\textit{R}$^2$ in white British"),
    y = latex2exp::TeX("Incremental Nagelkerke's \\textit{R}$^2$")
)

In [162]:
p_trans_scatter_TjurR2 <- transferability_df %>%
filter(
    family == "binomial"
) %>%
plot_transferability_scatter(
    "TjurR2",
    xpos_base = .35,
    ypos = c(0, 0.025, -0.03, 0.0)
) +
labs(
    title = 'Binary traits (Binomial model)',
    x = latex2exp::TeX("Incremental Tjur's \\textit{R}$^2$ in white British"),
    y = latex2exp::TeX("Incremental Tjur's \\textit{R}$^2$")
)

In [152]:
p_trans_scatter_AUC <- transferability_df %>%
filter(
    family == "binomial"
) %>%
plot_transferability_scatter(
    "auc",
    xpos_base = .35,
    ypos = c(0, 0.01, 0, 0)
) +
labs(
    title = 'Binary traits (Binomial model)',
    x = latex2exp::TeX("Incremental AUC in white British"),
    y = latex2exp::TeX("Incremental AUC")
)

In [191]:
for(ext in c('png', 'pdf')){
    ggsave(
        file.path("plots", sprintf('transferability.%s', ext)),
        gridExtra::arrangeGrob(
            p_trans_violin_NagelkerkeR2 +
            labs(title = '(A) Binary traits (Binomial model)'),
            p_trans_scatter_NagelkerkeR2 +
            labs(title = '(B) Binary traits (Binomial model)'),
            p_trans_violin_gaussian +
            labs(title = '(C) Quantitative traits (Gaussian model)'),
            p_trans_scatter_gaussian +
            labs(title = '(D) Quantitative traits (Gaussian model)'),
            ncol=2
        ),
        width=16, height=16
    )
    ggsave(
        file.path("plots", sprintf('transferability_gaussian_noBiomarkers.%s', ext)),
        gridExtra::arrangeGrob(
            p_trans_violin_gaussian_noBiomarkers,
            p_trans_scatter_gaussian_noBiomarkers,
            ncol=2
        ),
        width=16, height=8
    )
    ggsave(
        file.path("plots", sprintf('transferability_AUC.%s', ext)),
        gridExtra::arrangeGrob(
            p_trans_violin_AUC,
            p_trans_scatter_AUC,
            ncol=2
        ),
        width=16, height=8
    )
    ggsave(
        file.path("plots", sprintf('transferability_Tjur.%s', ext)),
        gridExtra::arrangeGrob(
            p_trans_violin_TjurR2,
            p_trans_scatter_TjurR2,
            ncol=2
        ),
        width=16, height=8
    )
}
