# load packages

In [None]:
library(rlang)
library(bigreadr)
library(R.utils)
library(tidyverse)
library(tidyposterior)
library(tidymodels)
library(parsnip)
library(probably)
library(ggpubr)
library(colorspace)
library(kableExtra)
library(IRdisplay)
library(ggsci)
library(knitr)
library(gginnards)
library(irr)
library(simpleboot)
library(boot)
library(reshape2)
library(data.table)
library(eulerr)
library(patchwork)
library(ggtext)
library(rstan)
library(posterior)
library(truncnorm)
library(gridExtra)
library(cowplot)

# read in input files

## score

In [None]:
score = fread('output/PMBBv3/score/PMBBv3_pgs.txt.gz',
                sep = '\t')
head(score)
length(unique(score$PGS))
length(unique(score$FID))

## pheno/covar

In [None]:
pheno_covar = fread('input/PMBB_v3.CKD_PGS.YK_phenotyping.phenotype_covariates.txt',
                    sep = '\t')
head(pheno_covar)

## pop

In [None]:
pop = fread('output/PMBBv3/score/PMBBv3_popsimilarity.txt.gz',
            sep = '\t')
head(pop)
colnames(pop)

## PGS IDs grouped by training sets

In [None]:
ukbb = read.csv('input/CKD.PGS_list.Training.UKBB.txt',
                header = F)
head(ukbb)

In [None]:
ckdgen_eur = read.csv('input/CKD.PGS_list.Training.CKDGen.EUR_only.txt',
                      header = F)
head(ckdgen_eur)

In [None]:
ckdgen_multi = read.csv('input/CKD.PGS_list.Training.CKDGen.multi_ancestry.txt',
                        header = F)
head(ckdgen_multi)

## PRScs iteration scores

In [None]:
afr_egfr = fread('output/AFR.eGFR.flip/PMBBv3PRScs/score/PMBBv3PRScs_pgs.txt.gz',
                 sep = '\t',
                 select = c('IID', 'PGS', 'Z_norm2'))
head(afr_egfr)
length(unique(afr_egfr$PGS))

In [None]:
eur_egfr = fread('output/EUR.eGFR.flip/PMBBv3PRScs/score/PMBBv3PRScs_pgs.txt.gz',
                 sep ='\t',
                 select = c('IID', 'PGS', 'Z_norm2'))
head(eur_egfr)
length(unique(eur_egfr$PGS))

In [None]:
meta_egfr1 = fread('/output/META.eGFR.flip/one/PMBBv3PRScs/score/PMBBv3PRScs_pgs.txt.gz',
                   sep ='\t',
                   select = c('IID', 'PGS', 'Z_norm2'))
head(meta_egfr1)
length(unique(meta_egfr1$PGS))

In [None]:
meta_egfr2 = fread('output/META.eGFR.flip/two/PMBBv3PRScs/score/PMBBv3PRScs_pgs.txt.gz',
                   sep ='\t',
                   select = c('IID', 'PGS', 'Z_norm2'))
head(meta_egfr2)
length(unique(meta_egfr2$PGS))

# clean scores

## subset and rename

In [None]:
score_sub = score %>%
select(-c(sampleset,FID) ) %>%
mutate(PGS = gsub('_hmPOS_GRCh38', '', PGS))
head(score_sub)

In [None]:
pheno_covar_rename = pheno_covar %>%
rename('IID' = 'person_id') %>%
mutate(IID = as.character(IID)) %>%
mutate(CKD = as.factor(CKD))
head(pheno_covar_rename)

In [None]:
pop_sub = pop %>%
select(IID, MostSimilarPop)
unique(pop_sub$MostSimilarPop)
head(pop_sub)

## merge

In [None]:
all_score = score_sub %>%
inner_join(pheno_covar_rename, by = 'IID') %>%
inner_join(pop_sub, by = 'IID')
nrow(all_score)
length(unique(all_score$IID))
nrow(score_sub)
length(unique(score_sub$IID))
nrow(pheno_covar_rename)
nrow(pop_sub)
head(all_score)

### PRScs iterations

In [None]:
afr_egfr_merge = afr_egfr %>%
inner_join(pheno_covar_rename, by = 'IID') %>%
inner_join(pop_sub, by = 'IID')
head(afr_egfr_merge)

In [None]:
eur_egfr_merge = eur_egfr %>%
inner_join(pheno_covar_rename, by = 'IID') %>%
inner_join(pop_sub, by = 'IID')
head(eur_egfr_merge)

In [None]:
meta_egfr = rbind(meta_egfr1, meta_egfr2)
meta_egfr_merge = meta_egfr %>%
inner_join(pheno_covar_rename, by = 'IID') %>%
inner_join(pop_sub, by = 'IID')
length(unique(meta_egfr_merge$PGS))
head(meta_egfr_merge)

## subset to EUR and AFR only

In [None]:
all_score_eur = all_score %>%
filter(grepl('EUR', MostSimilarPop))
unique(all_score_eur$MostSimilarPop)
head(all_score_eur)

In [None]:
all_score_afr = all_score %>%
filter(grepl('AFR', MostSimilarPop))
unique(all_score_afr$MostSimilarPop)
head(all_score_afr)

# get summary data

## case/control

In [None]:
all_score %>%
select(IID, CKD) %>%
distinct() %>%
group_by(CKD) %>%
summarise(n())

## age

In [None]:
all_score %>%
select(IID, AGE) %>%
distinct() %>%
select(-c(IID)) %>%
summary()

In [None]:
all_score %>%
select(IID,AGE) %>%
distinct() %>%
ggplot(aes(x = AGE, fill = 'red')) + geom_density(alpha = 0.5) +
ggtitle('PMBB v3 CKD PGS Age Distribution') +
theme(legend.position = "none")

ggsave('output/plots/PMBB_v3.CKD.PGS.YK_phenotyping.all_samples.age_distribution.png')

In [None]:
all_score %>%
select(IID, CKD, AGE) %>%
distinct() %>%
group_by(CKD) %>%
summarize(Mean_Age = mean(AGE, na.rm = TRUE))

In [None]:
all_score %>%
select(IID, CKD, AGE) %>%
distinct() %>%
ggplot(aes(x = AGE, color = CKD, fill = CKD)) + geom_density(alpha = 0.5) +
ggtitle('PMBB v3 CKD PGS Age Distribution')

ggsave('output/plots/PMBB_v3.CKD.PGS.YK_phenotyping.case_control.age_distribution.png')

## sex

In [None]:
all_score %>%
select(IID, SEX, CKD) %>%
distinct() %>%
group_by(SEX) %>%
summarize(Count = n()) %>%
mutate(Percent = (Count / sum(Count)) * 100)

In [None]:
all_score %>%
select(IID, SEX) %>%
distinct() %>%
mutate(SEX = gsub(1, 'Male', SEX)) %>%
mutate(SEX = gsub(2, 'Female', SEX)) %>%
ggplot(aes(x = SEX, fill = SEX)) + geom_bar() +
ggtitle('PMBB v3 CKD PGS Sex Distribution')

ggsave('output/plots/PMBB_v3.CKD.PGS.YK_phenotyping.all_samples.sex_distribution.png')

In [None]:
all_score %>%
select(IID, SEX, CKD) %>%
distinct() %>%
group_by(CKD, SEX) %>%
summarize(Count = n()) %>%
mutate(Percent = (Count / sum(Count)) * 100)

In [None]:
all_score %>%
select(IID, SEX, CKD) %>%
distinct() %>%
mutate(SEX = gsub(1, 'Male', SEX)) %>%
mutate(SEX = gsub(2, 'Female', SEX)) %>%
ggplot(aes(x = SEX, fill = CKD)) + geom_bar() +
ggtitle('PMBB v3 CKD PGS Sex Distribution')

ggsave('output/plots/PMBB_v3.CKD.PGS.YK_phenotyping.case_control.sex_distribution.png')

## ancestry

In [None]:
all_score %>%
select(IID, MostSimilarPop) %>%
distinct() %>%
group_by(MostSimilarPop) %>%
summarize(Count = n()) %>%
mutate(Percent = (Count / sum(Count)) * 100)

In [None]:
all_score %>%
select(IID, MostSimilarPop) %>%
distinct() %>%
rename('ANCESTRY' = 'MostSimilarPop') %>%
ggplot(aes(x = ANCESTRY, fill = ANCESTRY)) + geom_bar() +
ggtitle('PMBB v3 CKD PGS Ancestry Distribution')

ggsave('output/plots/PMBB_v3.CKD.PGS.YK_phenotyping.all_samples.ancestry_distribution.png')

In [None]:
all_score %>%
select(IID, CKD, MostSimilarPop) %>%
distinct() %>%
group_by(MostSimilarPop, CKD) %>%
summarize(Count = n()) %>%
mutate(Percent = (Count / sum(Count)) * 100)

In [None]:
all_score %>%
select(IID, MostSimilarPop, CKD) %>%
distinct() %>%
rename('ANCESTRY' = 'MostSimilarPop') %>%
ggplot(aes(x = ANCESTRY, fill = CKD)) + geom_bar() +
ggtitle('PMBB v3 CKD PGS Ancestry Distribution')

ggsave('output/plots/PMBB_v3.CKD.PGS.YK_phenotyping.case_control.ancestry_distribution.png')

## batch

In [None]:
all_score %>%
select(IID, BATCH) %>%
distinct() %>%
group_by(BATCH) %>%
summarize(Count = n()) %>%
mutate(Percent = (Count / sum(Count)) * 100)

In [None]:
all_score %>%
select(IID, BATCH) %>%
distinct() %>%
mutate(BATCH = gsub(2, 'v3', BATCH)) %>%
mutate(BATCH = gsub(1, 'v2', BATCH)) %>%
ggplot(aes(x = BATCH, fill = BATCH)) + geom_bar() +
ggtitle('PMBB v3 CKD PGS Batch Distribution')

ggsave('output/plots/PMBB_v3.CKD.PGS.YK_phenotyping.all_samples.batch_distribution.png')

In [None]:
all_score %>%
select(IID, BATCH, CKD) %>%
distinct() %>%
group_by(CKD, BATCH) %>%
summarize(Count = n()) %>%
mutate(Percent = (Count / sum(Count)) * 100)

In [None]:
all_score %>%
select(IID, BATCH, CKD) %>%
distinct() %>%
mutate(BATCH = gsub(2, 'v3', BATCH)) %>%
mutate(BATCH = gsub(1, 'v2', BATCH)) %>%
ggplot(aes(x = BATCH, fill = CKD)) + geom_bar() +
ggtitle('PMBB v3 CKD PGS Batch Distribution')

ggsave('output/plots/PMBB_v3.CKD.PGS.YK_phenotyping.case_control.batch_distribution.png')

# logisitic regressions

## create score list

In [None]:
score_list = unique(all_score$PGS)
length(score_list)
score_list

## run LR on all scores for all individuals (Znorm2 + age + sex)

In [None]:
# create empty lists
all_tidy_znorm2_list = list()
all_glance_znorm2_list = list()

# loop through scores
for (score in score_list) {
    index <- which(score_list == score)
    message(paste0('starting ', score, ' at index ', index))
    # filter df to that score
    one_score = all_score %>%
    filter(grepl(score, PGS))
    
    # run LR
    glm <- glm(data = one_score, as.formula("CKD ~ Z_norm2 + AGE + SEX + BATCH"), family = "binomial")
    # extract stats
    ## term, estimate, std.error, statistic, p.value, conf.low, conf.high
    tidy = data.frame(broom::tidy(glm, exponentiate = TRUE, conf.int = TRUE))
    ## null.deviance, df.null, logLik, AIC, BIC, deviance, df.residual, nobs
    glance = data.frame(broom::glance(glm))
    
    # add score columns
    tidy = tidy %>%
    mutate(SCORE = score)
    glance = glance %>%
    mutate(SCORE = score)
    
    # add to lists
    all_tidy_znorm2_list[[score]] <- tidy
    all_glance_znorm2_list[[score]] <- glance
}

In [None]:
all_tidy_znorm2 = bind_rows(all_tidy_znorm2_list)
all_glance_znorm2 = bind_rows(all_glance_znorm2_list)

## run LR on EUR only (Znorm2 + age + sex)

In [None]:
# create empty dataframes
all_tidy_eur_znorm2_list = list()
all_glance_eur_znorm2_list = list()

# loop through scores
for (score in score_list) {
    index <- which(score_list == score)
    message(paste0('starting ', score, ' at index ', index))
    # filter df to that score
    one_score = all_score_eur %>%
    filter(grepl(score,PGS))
    
    # run LR
    glm <- glm(data = one_score, as.formula("CKD ~ Z_norm2 + AGE + SEX + BATCH"), family = "binomial")
    # extract stats
    ## term, estimate, std.error, statistic, p.value, conf.low, conf.high
    tidy = data.frame(broom::tidy(glm, exponentiate = TRUE, conf.int = TRUE))
    ## null.deviance, df.null, logLik, AIC, BIC, deviance, df.residual, nobs
    glance = data.frame(broom::glance(glm))
    
    # add score columns
    tidy = tidy %>%
    mutate(SCORE = score)
    glance = glance %>%
    mutate(SCORE = score)
    
    # add to lists
    all_tidy_eur_znorm2_list[[score]] <- tidy
    all_glance_eur_znorm2_list[[score]] <- glance
}

In [None]:
all_tidy_eur_znorm2 = bind_rows(all_tidy_eur_znorm2_list)
all_glance_eur_znorm2 = bind_rows(all_glance_eur_znorm2_list)

## run LR on AFR only (Znorm2 + age + sex)

In [None]:
# create empty dataframes
all_tidy_afr_znorm2_list = list()
all_glance_afr_znorm2_list = list()

# loop through scores
for (score in score_list) {
    index <- which(score_list == score)
    message(paste0('starting ', score, ' at index ', index))
    # filter df to that score
    one_score = all_score_afr %>%
    filter(grepl(score,PGS))
    
    # run LR
    glm <- glm(data = one_score, as.formula("CKD ~ Z_norm2 + AGE + SEX + BATCH"), family = "binomial")
    # extract stats
    ## term, estimate, std.error, statistic, p.value, conf.low, conf.high
    tidy = data.frame(broom::tidy(glm, exponentiate = TRUE, conf.int = TRUE))
    ## null.deviance, df.null, logLik, AIC, BIC, deviance, df.residual, nobs
    glance = data.frame(broom::glance(glm))
    
    # add score columns
    tidy = tidy %>%
    mutate(SCORE = score)
    glance = glance %>%
    mutate(SCORE = score)
    
    # add to lists
    all_tidy_afr_znorm2_list[[score]] <- tidy
    all_glance_afr_znorm2_list[[score]] <- glance
}

In [None]:
all_tidy_afr_znorm2 = bind_rows(all_tidy_afr_znorm2_list)
all_glance_afr_znorm2 = bind_rows(all_glance_afr_znorm2_list)

## filter tidy df to only scores, add CI column, and remove score suffix

In [None]:
all_tidy_znorm2_score = all_tidy_znorm2 %>%
filter(term == "Z_norm2") %>%
mutate(CI = paste0("(", conf.low, ", ", conf.high, ")"))
summary(all_tidy_znorm2_score$p.value)
summary(all_tidy_znorm2_score$estimate)

In [None]:
all_tidy_eur_znorm2_score = all_tidy_eur_znorm2 %>%
filter(term == "Z_norm2") %>%
mutate(CI = paste0("(", conf.low, ", ", conf.high, ")"))
summary(all_tidy_eur_znorm2_score$p.value)
summary(all_tidy_eur_znorm2_score$estimate)

In [None]:
all_tidy_afr_znorm2_score = all_tidy_afr_znorm2 %>%
filter(term == "Z_norm2") %>%
mutate(CI = paste0("(", conf.low, ", ", conf.high, ")"))
summary(all_tidy_afr_znorm2_score$p.value)
summary(all_tidy_afr_znorm2_score$estimate)

## export

In [None]:
write.table(all_tidy_znorm2_score,
            'output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.glm.score.tidy.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

In [None]:
write.table(all_glance_znorm2,
            'output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.glm.raw.glance.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

In [None]:
write.table(all_tidy_eur_znorm2_score,
            'output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.glm.score.tidy.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

In [None]:
write.table(all_glance_eur_znorm2,
            'output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.glm.raw.glance.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

In [None]:
write.table(all_tidy_afr_znorm2_score,
            'output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.glm.score.tidy.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

In [None]:
write.table(all_glance_afr_znorm2,
            'output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.glm.raw.glance.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

# assess effect size

## create forest plots

In [None]:
all_tidy_znorm2_score %>% 
ggplot(., aes(x = estimate, y = SCORE, xmin = conf.low, xmax = conf.high)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
geom_vline(xintercept = 1, linetype = "dashed", color = "#4C4E52", linewidth = 0.75) +
labs(x = "Odds Ratio (OR)", y = "Score in Model") +
ggtitle('PMBB v3 ALL CKD PGS Z_norm2 Odds Ratio') +
theme_minimal() +
theme(legend.position = "top") +
theme(text = element_text(size = 10)) 
ggsave("output/plots/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.glm.odds_ratio.forestplot.png")

In [None]:
all_tidy_eur_znorm2_score %>% 
ggplot(., aes(x = estimate, y = SCORE, xmin = conf.low, xmax = conf.high)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
geom_vline(xintercept = 1, linetype = "dashed", color = "#4C4E52", linewidth = 0.75) +
labs(x = "Odds Ratio (OR)", y = "Score in Model") +
ggtitle('PMBB v3 EUR CKD PGS Z_norm2 Odds Ratio') +
theme_minimal() +
theme(legend.position = "top") +
theme(text = element_text(size = 10)) 
ggsave("output/plots/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.glm.odds_ratio.forestplot.png")

In [None]:
all_tidy_afr_znorm2_score %>% 
ggplot(., aes(x = estimate, y = SCORE, xmin = conf.low, xmax = conf.high)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
geom_vline(xintercept = 1, linetype = "dashed", color = "#4C4E52", linewidth = 0.75) +
labs(x = "Odds Ratio (OR)", y = "Score in Model") +
ggtitle('PMBB v3 AFR CKD PGS Z_norm2 Odds Ratio') +
theme_minimal() +
theme(legend.position = "top") +
theme(text = element_text(size = 10)) 
ggsave("output/plots/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.glm.odds_ratio.forestplot.png")

## assess significance

### check significance

In [None]:
nrow(all_tidy_znorm2_score)
all_tidy_znorm2_score %>%
filter(p.value < 0.05 & statistic > 1) %>%
nrow()
all_tidy_znorm2_score %>%
filter(p.value > 0.05 | statistic < 1)

In [None]:
nrow(all_tidy_eur_znorm2_score)
all_tidy_eur_znorm2_score %>%
filter(p.value < 0.05 & statistic > 1) %>%
nrow()
all_tidy_eur_znorm2_score %>%
filter(p.value > 0.05 | statistic < 1)

In [None]:
nrow(all_tidy_afr_znorm2_score)
all_tidy_afr_znorm2_score %>%
filter(p.value < 0.05 & statistic > 1) %>%
nrow()
all_tidy_afr_znorm2_score %>%
filter(p.value > 0.05 | statistic < 1)

### figure out which ones to keep for afr

In [None]:
all_tidy_afr_znorm2_score %>%
filter(p.value < 0.05) %>%
filter(statistic > 1) %>%
select(SCORE) %>%
unique()

# compute AUROC & brier score

## make score list

In [None]:
score_list = unique(all_score$PGS)
score_list = score_list[!score_list %in% c("PGS002757",
                                           "PGS005090",
                                           "EUR.eGFR.PRScsx",
                                           "EAS.eGFR.PRScsx",
                                           "AMR.eGFR.PRScsx",
                                           "AFR.eGFR.PRScsx")]
length(score_list)
print(score_list)
class(score_list)

In [None]:
score_list_multiancestry = unique(all_score$PGS)
score_list_multiancestry = score_list_multiancestry[!score_list_multiancestry %in% c("PGS002757",
                                                                                     "PGS005090",
                                                                                     "EAS.eGFR.PRScsx",
                                                                                     "AMR.eGFR.PRScsx",
                                                                                     "AFR.eGFR.PRScsx",
                                                                                     "EUR.eGFR.PRScsx",
                                                                                     "EAS.eGFR.flip.PRScsx",
                                                                                     "AMR.eGFR.flip.PRScsx",
                                                                                     "AFR.eGFR.flip.PRScsx",
                                                                                     "AFR.Phe_585.3.PRScsx",
                                                                                     "EAS.Phe_585.3.PRScsx")]
length(score_list_multiancestry)
print(score_list_multiancestry)
class(score_list_multiancestry)

In [None]:
score_list_eur = unique(all_score$PGS)
score_list_eur = score_list_eur[!score_list_eur %in% c("PGS002757",
                                                       "PGS005090",
                                                       "EUR.eGFR.PRScsx",
                                                       "EAS.eGFR.PRScsx",
                                                       "AMR.eGFR.PRScsx",
                                                       "AFR.eGFR.PRScsx")]
length(score_list_eur)
print(score_list_eur)
class(score_list_eur)

In [None]:
score_list_eur_multiancestry = unique(all_score$PGS)
score_list_eur_multiancestry = score_list_eur_multiancestry[!score_list_eur_multiancestry %in% c("PGS002757",
                                                                                                 "PGS005090",
                                                                                                 "EAS.eGFR.PRScsx",
                                                                                                 "AMR.eGFR.PRScsx",
                                                                                                 "AFR.eGFR.PRScsx",
                                                                                                 "EUR.eGFR.PRScsx",
                                                                                                 "EAS.eGFR.flip.PRScsx",
                                                                                                 "AMR.eGFR.flip.PRScsx",
                                                                                                 "AFR.eGFR.flip.PRScsx",
                                                                                                 "AFR.Phe_585.3.PRScsx",
                                                                                                 "EAS.Phe_585.3.PRScsx")]
length(score_list_eur_multiancestry)
print(score_list_eur_multiancestry)
class(score_list_eur_multiancestry)

In [None]:
score_list_afr = c('AFR.Phe_585.3.PRScsx',
                   'AFR.eGFR.flip.PRScsx',
                   'AMR.eGFR.flip.PRScsx',
                   'EAS.Phe_585.3.PRScsx',
                   'EAS.eGFR.flip.PRScsx',
                   'EUR.Phe_585.3.PRScsx',
                   'EUR.eGFR.flip.PRScsx',
                   'PGS000728',
                   'PGS002237',
                   'PGS003988',
                   'PGS004004',
                   'PGS004016',
                   'PGS004030',
                   'PGS004045',
                   'PGS004058',
                   'PGS004074',
                   'PGS004088',
                   'PGS004101',
                   'PGS004128',
                   'PGS004142',
                   'PGS004158',
                   'PGS004889',
                   'PGS005113')
length(score_list_afr)
score_list_afr

In [None]:
score_list_afr_multiancestry = c('EUR.Phe_585.3.PRScsx',
                                 'EUR.eGFR.flip.PRScsx',
                                 'PGS000728',
                                 'PGS002237',
                                 'PGS003988',
                                 'PGS004004',
                                 'PGS004016',
                                 'PGS004030',
                                 'PGS004045',
                                 'PGS004058',
                                 'PGS004074',
                                 'PGS004088',
                                 'PGS004101',
                                 'PGS004128',
                                 'PGS004142',
                                 'PGS004158',
                                 'PGS004889',
                                 'PGS005113')
length(score_list_afr_multiancestry)
score_list_afr_multiancestry

## run models on all individuals and make discrimination & calibration plots

In [None]:
# set glm model
message('setting parameters')
glm_model <- 
logistic_reg() %>% 
set_engine("glm")

# define metrics of interest as brier score & auroc
cls_met <- metric_set(roc_auc, brier_class)
# set parameters
control <- control_resamples(save_pred = TRUE, save_workflow = TRUE)

# Create list of data frames to store model info
model_metrics_dfs = list()
workflow_dfs = list()

# Create an empty lists to store the results for each predictor
roc_plot_list <- list()
cal_plot_list <- list()

# loop through scores
message('starting for loop')
for (score in score_list) {
    index <- which(score_list == score)
    message(paste0('starting ', score, ' at index ', index))
    
    # set seed again
    set.seed(7)
    
    # filter df to that score
    scores_vfold = all_score %>%
    filter(grepl(score,PGS))
    
    # set number of folds and repeats
    folds_x <- vfold_cv(scores_vfold, v = 10, repeats = 6, strata = NULL, pool = 0.1)
    
    # build workflow
    glm_workflow <- workflow() %>% 
    add_model(glm_model) %>% 
    add_variables(outcome = CKD, predictors = c(Z_norm2, AGE, SEX, BATCH)) # Remove Age and Sex if looking to test null model with just outcome ~ PGS
    
    # run model with cross validation
    glm_fit_all <- fit_resamples(glm_workflow, folds_x, metrics = cls_met, control = control)
    
    # collect metrics
    metrics = data.frame(collect_metrics(glm_fit_all))
    metrics = metrics%>%
    dplyr::mutate(predictor = score)
    model_metrics_dfs[[score]] <- metrics
    
    # collect predictions
    preds <- collect_predictions(glm_fit_all)
    
    # collect workflow
    glm_workflow <- as_workflow_set (!!score := glm_fit_all) 
    workflow_dfs[[score]] <- glm_workflow
    
    # make AUROC plot data
    roc_data <- preds %>%  
    roc_curve(CKD, .pred_1, event_level = "second") %>%
    dplyr::mutate(predictor = score)
    
    roc_plot_list[[score]] <- roc_data
  
    # make calibration plot
    cal_plot <- preds %>%
    cal_plot_logistic(CKD, .pred_0, smooth = FALSE, conf_level = 0.95,  include_rug = FALSE) + 
    ggtitle(paste0('PMBB v3 ALL CKD ', score, ' Calibration'))
      
    cal_plot_list[[score]] <- cal_plot
}

## run models on all individuals with multiancestry PGS weights

In [None]:
# set glm model
message('setting parameters')
glm_model <- 
logistic_reg() %>% 
set_engine("glm")

# define metrics of interest as brier score, auroc & logloss
cls_met <- metric_set(roc_auc, brier_class)
# set parameters
control <- control_resamples(save_pred = TRUE, save_workflow = TRUE)

# Create list of data frames to store model info
workflow_multiancestry_score_dfs = list()


# loop through scores
message('starting for loop')
for (score in score_list_multiancestry) {
    index <- which(score_list_multiancestry == score)
    message(paste0('starting ', score, ' at index ', index))
    
    # set seed again
    set.seed(7)
    
    # filter df to that score
    scores_vfold = all_score %>%
    filter(grepl(score,PGS))
    
    # set number of folds and repeats
    folds_x <- vfold_cv(scores_vfold, v = 10, repeats = 6, strata = NULL, pool = 0.1)
    
    # build workflow
    glm_workflow <- workflow() %>% 
    add_model(glm_model) %>% 
    add_variables(outcome = CKD, predictors = c(Z_norm2, AGE, SEX, BATCH)) # Remove Age and Sex if looking to test null model with just outcome ~ PGS
    
    # run model with cross validation
    glm_fit_all <- fit_resamples(glm_workflow, folds_x, metrics = cls_met, control = control)
    
    # collect workflow
    glm_workflow <- as_workflow_set (!!score := glm_fit_all) 
    workflow_multiancestry_score_dfs[[score]] <- glm_workflow
}

## run models on EUR individuals and make discrimination/calibration plots

In [None]:
# set glm model
message('setting parameters')
glm_model <- 
logistic_reg() %>% 
set_engine("glm")

# define metrics of interest as brier score & auroc
cls_met <- metric_set(roc_auc, brier_class)
# set parameters
control <- control_resamples(save_pred = TRUE, save_workflow = TRUE)

# Create list of data frames to store model info
model_metrics_eur_dfs = list()
workflow_eur_dfs = list()

# Create an empty lists to store the results for each predictor
roc_plot_list_eur <- list()
cal_plot_list_eur <- list()

# loop through scores
message('starting for loop')
for (score in score_list_eur) {
    index <- which(score_list_eur == score)
    message(paste0('starting ', score, ' at index ', index))
    
    # set seed again
    set.seed(7)
    
    # filter df to that score
    scores_vfold = all_score_eur %>%
    filter(grepl(score,PGS))
    
    # set number of folds and repeats
    folds_x <- vfold_cv(scores_vfold, v = 10, repeats = 6, strata = NULL, pool = 0.1)
    
    # build workflow
    glm_workflow <- workflow() %>% 
    add_model(glm_model) %>% 
    add_variables(outcome = CKD, predictors = c(Z_norm2, AGE, SEX, BATCH)) # Remove Age and Sex if looking to test null model with just outcome ~ PGS
    
    # run model with cross validation
    glm_fit_all <- fit_resamples(glm_workflow, folds_x, metrics = cls_met, control = control)
    
    # collect metrics
    metrics = data.frame(collect_metrics(glm_fit_all))
    metrics = metrics%>%
    dplyr::mutate(predictor = score)
    model_metrics_eur_dfs[[score]] <- metrics
    
    # collect predictions
    preds <- collect_predictions(glm_fit_all)
    
    # collect workflow
    glm_workflow <- as_workflow_set (!!score := glm_fit_all) 
    workflow_eur_dfs[[score]] <- glm_workflow
    
    # make AUROC plot data
    roc_data <- preds %>%  
    roc_curve(CKD, .pred_1, event_level = "second") %>%
    dplyr::mutate(predictor = score)
    
    roc_plot_list_eur[[score]] <- roc_data
  
    # make calibration plot
    cal_plot <- preds %>%
    cal_plot_logistic(CKD, .pred_0, smooth = FALSE, conf_level = 0.95,  include_rug = FALSE) + 
    ggtitle(paste('PMBB v3 EUR CKD', score, 'Calibration'))
      
    cal_plot_list_eur[[score]] <- cal_plot
}

## run models in EUR individuals with multiancestry PGS weights

In [None]:
# set glm model
message('setting parameters')
glm_model <- 
logistic_reg() %>% 
set_engine("glm")

# define metrics of interest as brier score, auroc & logloss
cls_met <- metric_set(roc_auc, brier_class, mn_log_loss)
# set parameters
control <- control_resamples(save_pred = TRUE, save_workflow = TRUE)

# Create list of data frames to store model info
workflow_eur_multiancestry_score_dfs = list()

# loop through scores
message('starting for loop')
for (score in score_list_eur_multiancestry) {
    index <- which(score_list_eur_multiancestry == score)
    message(paste0('starting ', score, ' at index ', index))
    
    # set seed again
    set.seed(7)
    
    # filter df to that score
    scores_vfold = all_score_eur %>%
    filter(grepl(score,PGS))
    
    # set number of folds and repeats
    folds_x <- vfold_cv(scores_vfold, v = 10, repeats = 6, strata = NULL, pool = 0.1)
    
    # build workflow
    glm_workflow <- workflow() %>% 
    add_model(glm_model) %>% 
    add_variables(outcome = CKD, predictors = c(Z_norm2, AGE, SEX, BATCH)) # Remove Age and Sex if looking to test null model with just outcome ~ PGS
    
    # run model with cross validation
    glm_fit_all <- fit_resamples(glm_workflow, folds_x, metrics = cls_met, control = control)

    # collect workflow
    glm_workflow <- as_workflow_set (!!score := glm_fit_all) 
    workflow_eur_multiancestry_score_dfs[[score]] <- glm_workflow
}


## run models on AFR individuals and make discrimination/calibration plots

In [None]:
# set glm model
message('setting parameters')
glm_model <- 
logistic_reg() %>% 
set_engine("glm")

# define metrics of interest as brier score & auroc
cls_met <- metric_set(roc_auc, brier_class)
# set parameters
control <- control_resamples(save_pred = TRUE, save_workflow = TRUE)

# Create list of data frames to store model info
model_metrics_afr_dfs = list()
workflow_afr_dfs = list()

# Create an empty lists to store the results for each predictor
roc_plot_list_afr <- list()
cal_plot_list_afr <- list()

# loop through scores
message('starting for loop')
for (score in score_list_afr) {
    index <- which(score_list_afr == score)
    message(paste0('starting ', score, ' at index ', index))
    
    # set seed again
    set.seed(7)
    
    # filter df to that score
    scores_vfold = all_score_afr %>%
    filter(grepl(score,PGS))
    
    # set number of folds and repeats
    folds_x <- vfold_cv(scores_vfold, v = 10, repeats = 6, strata = NULL, pool = 0.1)
    
    # build workflow
    glm_workflow <- workflow() %>% 
    add_model(glm_model) %>% 
    add_variables(outcome = CKD, predictors = c(Z_norm2, AGE, SEX, BATCH)) # Remove Age and Sex if looking to test null model with just outcome ~ PGS
    
    # run model with cross validation
    glm_fit_all <- fit_resamples(glm_workflow, folds_x, metrics = cls_met, control = control)
    
    # collect metrics
    metrics = data.frame(collect_metrics(glm_fit_all))
    metrics = metrics%>%
    dplyr::mutate(predictor = score)
    model_metrics_afr_dfs[[score]] <- metrics
    
    # collect predictions
    preds <- collect_predictions(glm_fit_all)
    
    # collect workflow
    glm_workflow <- as_workflow_set (!!score := glm_fit_all) 
    workflow_afr_dfs[[score]] <- glm_workflow
    
    # make AUROC plot data
    roc_data <- preds %>%  
    roc_curve(CKD, .pred_1, event_level = "second") %>%
    dplyr::mutate(predictor = score)
    
    roc_plot_list_afr[[score]] <- roc_data
  
    # make calibration plot
    cal_plot <- preds %>%
    cal_plot_logistic(CKD, .pred_0, smooth = FALSE, conf_level = 0.95,  include_rug = FALSE) + 
    ggtitle(paste('PMBB v3 AFR CKD', score, 'Calibration'))
      
    cal_plot_list_afr[[score]] <- cal_plot
}


## run model on afr individuals with multiancestry weights

In [None]:
# set glm model
message('setting parameters')
glm_model <- 
logistic_reg() %>% 
set_engine("glm")

# define metrics of interest as brier score & auroc
cls_met <- metric_set(roc_auc, brier_class)
# set parameters
control <- control_resamples(save_pred = TRUE, save_workflow = TRUE)

# Create list of data frames to store model info
workflow_afr_multiancestry_score_dfs = list()

# loop through scores
message('starting for loop')
for (score in score_list_afr_multiancestry) {
    index <- which(score_list_afr_multiancestry == score)
    message(paste0('starting ', score, ' at index ', index))
    
    # set seed again
    set.seed(7)
    
    # filter df to that score
    scores_vfold = all_score_afr %>%
    filter(grepl(score,PGS))
    
    # set number of folds and repeats
    folds_x <- vfold_cv(scores_vfold, v = 10, repeats = 6, strata = NULL, pool = 0.1)
    
    # build workflow
    glm_workflow <- workflow() %>% 
    add_model(glm_model) %>% 
    add_variables(outcome = CKD, predictors = c(Z_norm2, AGE, SEX, BATCH)) # Remove Age and Sex if looking to test null model with just outcome ~ PGS
    
    # run model with cross validation
    glm_fit_all <- fit_resamples(glm_workflow, folds_x, metrics = cls_met, control = control)

    # collect workflow
    glm_workflow <- as_workflow_set (!!score := glm_fit_all) 
    workflow_afr_multiancestry_score_dfs[[score]] <- glm_workflow
}


## concatenate

### all

In [None]:
workflows = bind_rows(workflow_dfs)
nrow(workflows)
print(workflows)

In [None]:
model_metrics = bind_rows(model_metrics_dfs) %>%
select(predictor, .metric, mean) %>%
dplyr::rename('metric' = '.metric',
              'value' = 'mean')
nrow(model_metrics)
head(model_metrics)

In [None]:
roc_plot_data = bind_rows(roc_plot_list)
nrow(roc_plot_data)
head(roc_plot_data)

### all individuals multiancestry scores

In [None]:
workflows_multiancestry_score = bind_rows(workflow_multiancestry_score_dfs)
nrow(workflows_multiancestry_score)
print(workflows_multiancestry_score)

### eur

In [None]:
workflows_eur = bind_rows(workflow_eur_dfs)
nrow(workflows_eur)
print(workflows_eur)

In [None]:
model_metrics_eur = bind_rows(model_metrics_eur_dfs) %>%
select(predictor, .metric, mean) %>%
dplyr::rename('metric' = '.metric',
              'value' = 'mean')
nrow(model_metrics_eur)
head(model_metrics_eur)

In [None]:
roc_plot_data_eur = bind_rows(roc_plot_list_eur)
nrow(roc_plot_data_eur)
head(roc_plot_data_eur)

### eur individuals multiancestry scores

In [None]:
workflows_eur_multiancestry_score = bind_rows(workflow_eur_multiancestry_score_dfs)
nrow(workflows_eur_multiancestry_score)
print(workflows_eur_multiancestry_score)

### afr

In [None]:
workflows_afr = bind_rows(workflow_afr_dfs)
nrow(workflows_afr)
print(workflows_afr)

In [None]:
model_metrics_afr = bind_rows(model_metrics_afr_dfs) %>%
select(predictor, .metric, mean) %>%
dplyr::rename('metric' = '.metric',
              'value' = 'mean')
nrow(model_metrics_afr)
head(model_metrics_afr)

In [None]:
roc_plot_data_afr = bind_rows(roc_plot_list_afr)
nrow(roc_plot_data_afr)
head(roc_plot_data_afr)

### afr individuals multiancestry scores

In [None]:
workflows_afr_multiancestry_score = bind_rows(workflow_afr_multiancestry_score_dfs)
nrow(workflows_afr_multiancestry_score)
print(workflows_afr_multiancestry_score)

## make/show & export plots

### AUROC

In [None]:
ggplot(roc_plot_data, aes(x = 1 - specificity, y = sensitivity, color = predictor)) +
geom_line(linewidth = 1.2) +
geom_abline(linetype = "dashed") +
labs(title = "PMBB v3 ALL CKD PGS ROC Curves", x = "1 - Specificity", y = "Sensitivity") +
theme_minimal()
ggsave("output/plots/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.roc_curve.png")

In [None]:
ggplot(roc_plot_data_eur, aes(x = 1 - specificity, y = sensitivity, color = predictor)) +
geom_line(linewidth = 1.2) +
geom_abline(linetype = "dashed") +
labs(title = "PMBB v3 EUR CKD PGS ROC Curves", x = "1 - Specificity", y = "Sensitivity") +
theme_minimal()
ggsave("output/plots/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.roc_curve.png")

In [None]:
ggplot(roc_plot_data_afr, aes(x = 1 - specificity, y = sensitivity, color = predictor)) +
geom_line(linewidth = 1.2) +
geom_abline(linetype = "dashed") +
labs(title = "PMBB v3 AFR CKD PGS ROC Curves", x = "1 - Specificity", y = "Sensitivity") +
theme_minimal()
ggsave("output/plots/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.roc_curve.png")

### Calibration

#### export as RDS to optimize storage

In [None]:
plot_list_cal <- lapply(cal_plot_list, function(l) {
  drop_vars(l)}) %>% 
  saveRDS(., file = "output/plots/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.calibration_plot.rds")

In [None]:
plot_list_cal_eur <- lapply(cal_plot_list_eur, function(l) {
  drop_vars(l)}) %>% 
  saveRDS(., file = "output/plots/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.calibration_plot.rds")

In [None]:
plot_list_cal_afr <- lapply(cal_plot_list_afr, function(l) {
  drop_vars(l)}) %>% 
  saveRDS(., file = "output/plots/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.calibration_plot.rds")

#### export one plot

In [None]:
cal_plot_list[[1]]
ggsave('output/plots/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.AFR.Phe_585.3.calibration_plot.png')

In [None]:
cal_plot_list_eur[[1]]
ggsave('output/plots/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.AFR.Phe_585.3.calibration_plot.png')

In [None]:
cal_plot_list_afr[[1]]
ggsave('output/plots/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.AFR.Phe_585.3.calibration_plot.png')

## make wide form model metrics df

In [None]:
model_metrics_wide = model_metrics %>%
pivot_wider(values_from = 'value',
            names_from = 'metric')
head(model_metrics_wide)

In [None]:
model_metrics_wide_eur = model_metrics_eur %>%
pivot_wider(values_from = 'value',
            names_from = 'metric')
head(model_metrics_wide_eur)

In [None]:
model_metrics_wide_afr = model_metrics_afr %>%
pivot_wider(values_from = 'value',
            names_from = 'metric')
head(model_metrics_wide_afr)

## export model stats

In [None]:
write.table(model_metrics,
           'output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.performance_metrics.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

In [None]:
write.table(model_metrics_wide,
           'output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.performance_metrics.wide_form.csv',
           sep = ',',
           col.names = T,
           row.names = F,
           quote = F)

In [None]:
write.table(model_metrics_eur,
           'output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.performance_metrics.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

In [None]:
write.table(model_metrics_wide_eur,
           'output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.performance_metrics.wide_form.csv',
           sep = ',',
           col.names = T,
           row.names = F,
           quote = F)

In [None]:
write.table(model_metrics_afr,
           'output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.performance_metrics.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

In [None]:
write.table(model_metrics_wide_afr,
           'output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.performance_metrics.wide_form.csv',
           sep = ',',
           col.names = T,
           row.names = F,
           quote = F)

# bayesian analysis of resampling statistics

## run models on all individuals

In [None]:
message('running model')
perf_model_brier <- perf_mod(workflows, metric = "brier_class", seed = 1, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_brier, file = 'output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.brier.rds')

In [None]:
message('running model')
perf_model_auc <- perf_mod(workflows, metric = "roc_auc", seed = 1, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_auc, file = 'output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.auc.rds')

## run model on all individuals with multi-ancestry scores

In [None]:
message('running model')
perf_model_brier_multiancestry_score <- perf_mod(workflows_multiancestry_score, metric = "brier_class", seed = 1, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_brier_multiancestry_score, file = 'output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.brier.rds')

In [None]:
message('running model')
perf_model_auc_multiancestry_score <- perf_mod(workflows_multiancestry_score, metric = "roc_auc", seed = 1, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_auc_multiancestry_score, file = 'output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.auc.rds')

## run models on EUR individuals

In [None]:
message('running model')
perf_model_brier_eur <- perf_mod(workflows_eur, metric = "brier_class", seed = 1, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_brier_eur, file = 'output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.brier.rds')

In [None]:
message('running model')
perf_model_auc_eur <- perf_mod(workflows_eur, metric = "roc_auc", seed = 1, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_auc_eur, file = 'output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.auc.rds')

## run models on EUR individuals with multiancestry scores

In [None]:
message('running model')
perf_model_brier_eur_multiancestry_score <- perf_mod(workflows_eur_multiancestry_score, metric = "brier_class", seed = 1, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_brier_eur_multiancestry_score, file = 'output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.brier.rds')

In [None]:
message('running model')
perf_model_auc_eur_multiancestry_score <- perf_mod(workflows_eur_multiancestry_score, metric = "roc_auc", seed = 1, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_auc_eur_multiancestry_score, file = 'output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.auc.rds')

## run models on AFR individuals

In [None]:
message('running model')
perf_model_brier_afr <- perf_mod(workflows_afr, metric = "brier_class", seed = 1, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_brier_afr, file = 'output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.brier.rds')

In [None]:
message('running model')
perf_model_auc_afr <- perf_mod(workflows_afr, metric = "roc_auc", seed = 1, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_auc_afr, file = 'output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.auc.rds')

## run models on AFR individuals with multiancestry scores

In [None]:
message('running model')
perf_model_brier_afr_multiancestry_score <- perf_mod(workflows_afr_multiancestry_score, metric = "brier_class", seed = 1, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_brier_afr_multiancestry_score, file = 'output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.brier.rds')

In [None]:
message('running model')
perf_model_auc_afr_multiancestry_score <- perf_mod(workflows_afr_multiancestry_score, metric = "roc_auc", seed = 1, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_auc_afr_multiancestry_score, file = 'output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.auc.rds')

## process models

### all individuals

#### brier

In [None]:
# Brier
best_brier <- tidy(perf_model_brier, seed = 7) %>%
summary() %>% 
slice_min(mean) %>%
pull(model)

contrast_models_brier_02 <- list()
contrast_models_brier_01 <- list()
contrast_models_brier_005 <- list()

for (score in score_list) {
  
    diff <-  contrast_models(
        perf_model_brier,
        list_1 = score,
        list_2 = best_brier,
        seed = 7) 
  
    model <- score
    
    contrast_models_brier_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)
  
    contrast_models_brier_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
    contrast_models_brier_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005)
}

brier_ci <- perf_model_brier %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(lower <= min(upper), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_brier_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability>=0.95,0,1)) %>% 
mutate(metric = "Brier Score") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_brier)

#### auc

In [None]:
# AUC
best_auc <- tidy(perf_model_auc, seed = 7) %>%
summary() %>% 
slice_max(mean) %>%
pull(model)

contrast_models_auc_02 <- list()
contrast_models_auc_01 <- list()
contrast_models_auc_005 <- list()

for (score in score_list) {
  
    diff <-  contrast_models(
        perf_model_auc,
        list_1 = best_auc,
        list_2 = score,
        seed = 7) 
  
    model <- score
  
    contrast_models_auc_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)
  
    contrast_models_auc_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
    contrast_models_auc_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005)
}

# Probability = proportion of the posterior that is > 0 (probability that the positive difference is real)
auc_ci <- perf_model_auc %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(upper >= max(lower), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_auc_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_auc_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_auc_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability>=0.95,0,1)) %>% 
mutate(metric = "AU ROC") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_auc)

### all individuals with multiancestry scores

#### brier

In [None]:
# Brier
best_brier_multiancestry_score <- tidy(perf_model_brier_multiancestry_score, seed = 7) %>%
summary() %>% 
slice_min(mean) %>%
pull(model)

contrast_models_brier_02 <- list()
contrast_models_brier_01 <- list()
contrast_models_brier_005 <- list()

for (score in score_list_multiancestry) {
  
    diff <-  contrast_models(
    perf_model_brier_multiancestry_score,
    list_1 = score,
    list_2 = best_brier_multiancestry_score,
    seed = 7) 
  
    model <- score
  
    contrast_models_brier_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)
  
    contrast_models_brier_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
    contrast_models_brier_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005)
}

brier_ci_multiancestry_score <- perf_model_brier_multiancestry_score %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(lower <= min(upper), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_brier_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability>=0.95,0,1)) %>% 
mutate(metric = "Brier Score") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_brier_multiancestry_score)

#### auc

In [None]:
# AUC
best_auc_multiancestry_score <- tidy(perf_model_auc_multiancestry_score, seed = 7) %>%
summary() %>% 
slice_max(mean) %>%
pull(model)

contrast_models_auc_02 <- list()
contrast_models_auc_01 <- list()
contrast_models_auc_005 <- list()

for (score in score_list_multiancestry) {

    diff <-  contrast_models(
    perf_model_auc_multiancestry_score,
    list_1 = best_auc_multiancestry_score,
    list_2 = score,
    seed = 7) 
  
    model <- score
  
    contrast_models_auc_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)
  
    contrast_models_auc_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
  contrast_models_auc_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005)
}

# Probability = proportion of the posterior that is > 0 (probability that the positive difference is real)
auc_ci_multiancestry_score <- perf_model_auc_multiancestry_score %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(upper >= max(lower), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_auc_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_auc_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_auc_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability>=0.95,0,1)) %>% 
mutate(metric = "AU ROC") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_auc_multiancestry_score)

### EUR individuals

#### brier

In [None]:
# Brier
best_brier_eur <- tidy(perf_model_brier_eur, seed = 7) %>%
summary() %>% 
slice_min(mean) %>%
pull(model)

contrast_models_brier_02 <- list()
contrast_models_brier_01 <- list()
contrast_models_brier_005 <- list()

for (score in score_list_eur) {
  
    diff <-  contrast_models(
        perf_model_brier_eur,
        list_1 = score,
        list_2 = best_brier_eur,
        seed = 7) 
  
    model <- score
  
    contrast_models_brier_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)
  
    contrast_models_brier_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
    contrast_models_brier_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005)
}

brier_ci_eur <- perf_model_brier_eur %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(lower <= min(upper), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_brier_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability>=0.95,0,1)) %>% 
mutate(metric = "Brier Score") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_brier_eur)

#### auroc

In [None]:
# AUC
best_auc_eur <- tidy(perf_model_auc_eur, seed = 7) %>%
summary() %>% 
slice_max(mean) %>%
pull(model)

contrast_models_auc_02 <- list()
contrast_models_auc_01 <- list()
contrast_models_auc_005 <- list()

for (score in score_list_eur) {
  
    diff <-  contrast_models(
        perf_model_auc_eur,
        list_1 = best_auc_eur,
        list_2 = score,
        seed = 7) 
  
    model <- score
  
    contrast_models_auc_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)
  
    contrast_models_auc_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
    contrast_models_auc_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005)
}

# Probability = proportion of the posterior that is > 0 (probability that the positive difference is real)
auc_ci_eur <- perf_model_auc_eur %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(upper >= max(lower), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_auc_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_auc_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_auc_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability>=0.95,0,1)) %>% 
mutate(metric = "AU ROC") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_auc_eur)

### eur individuals with multi ancestry scores

#### brier

In [None]:
# Brier
best_brier_eur_multiancestry_score <- tidy(perf_model_brier_eur_multiancestry_score, seed = 7) %>%
summary() %>% 
slice_min(mean) %>%
pull(model)

contrast_models_brier_02 <- list()
contrast_models_brier_01 <- list()
contrast_models_brier_005 <- list()

for (score in score_list_eur_multiancestry) {
  
    diff <-  contrast_models(
    perf_model_brier_eur_multiancestry_score,
    list_1 = score,
    list_2 = best_brier_eur_multiancestry_score,
    seed = 7) 
  
    model <- score
  
    contrast_models_brier_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)
  
    contrast_models_brier_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
    contrast_models_brier_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005)
}

brier_ci_eur_multiancestry_score <- perf_model_brier_eur_multiancestry_score %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(lower <= min(upper), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_brier_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability>=0.95,0,1)) %>% 
mutate(metric = "Brier Score") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_brier_eur_multiancestry_score)

#### auroc

In [None]:
# AUC
best_auc_eur_multiancestry_score <- tidy(perf_model_auc_eur_multiancestry_score, seed = 7) %>%
summary() %>% 
slice_max(mean) %>%
pull(model)

contrast_models_auc_02 <- list()
contrast_models_auc_01 <- list()
contrast_models_auc_005 <- list()

for (score in score_list_eur_multiancestry) {
  
    diff <-  contrast_models(
    perf_model_auc_eur_multiancestry_score,
    list_1 = best_auc_eur_multiancestry_score,
    list_2 = score,
    seed = 7) 
  
    model <- score
   
    contrast_models_auc_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)
  
    contrast_models_auc_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
    contrast_models_auc_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005)
}

# Probability = proportion of the posterior that is > 0 (probability that the positive difference is real)
auc_ci_eur_multiancestry_score <- perf_model_auc_eur_multiancestry_score %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(upper >= max(lower), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_auc_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_auc_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_auc_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability>=0.95,0,1)) %>% 
mutate(metric = "AU ROC") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_auc_eur_multiancestry_score)

### afr

#### brier

In [None]:
# Brier
best_brier_afr <- tidy(perf_model_brier_afr, seed = 7) %>%
summary() %>% 
slice_min(mean) %>%
pull(model)


contrast_models_brier_02 <- list()
contrast_models_brier_01 <- list()
contrast_models_brier_005 <- list()

for (score in score_list_afr) {
  
    diff <-  contrast_models(
        perf_model_brier_afr,
        list_1 = score,
        list_2 = best_brier_afr,
        seed = 7) 
  
    model <- score
    
    contrast_models_brier_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)
  
    contrast_models_brier_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
    contrast_models_brier_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005)
}

brier_ci_afr <- perf_model_brier_afr %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(lower <= min(upper), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_brier_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability>=0.95,0,1)) %>% 
mutate(metric = "Brier Score") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_brier_afr)

#### auc

In [None]:
# AUC
best_auc_afr <- tidy(perf_model_auc_afr, seed = 7) %>%
summary() %>% 
slice_max(mean) %>%
pull(model)

contrast_models_auc_02 <- list()
contrast_models_auc_01 <- list()
contrast_models_auc_005 <- list()

for (score in score_list_afr) {
    
    diff <-  contrast_models(
        perf_model_auc_afr,
        list_1 = best_auc_afr,
        list_2 = score,
        seed = 7) 
    
    model <- score
  
    contrast_models_auc_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)
  
    contrast_models_auc_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
    contrast_models_auc_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005)
}

# Probability = proportion of the posterior that is > 0 (probability that the positive difference is real)
auc_ci_afr <- perf_model_auc_afr %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(upper >= max(lower), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_auc_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>%
merge(.,(bind_rows(contrast_models_auc_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_auc_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability>=0.95,0,1)) %>% 
mutate(metric = "AU ROC") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_auc_afr)

### afr individuals with multiancestry scores

#### brier

In [None]:
# Brier
best_brier_afr_multiancestry_score <- tidy(perf_model_brier_afr_multiancestry_score, seed = 7) %>%
summary() %>% 
slice_min(mean) %>%
pull(model)

contrast_models_brier_02 <- list()
contrast_models_brier_01 <- list()
contrast_models_brier_005 <- list()

for (score in score_list_afr_multiancestry) {
  
    diff <-  contrast_models(
        perf_model_brier_afr_multiancestry_score,
        list_1 = score,
        list_2 = best_brier_afr_multiancestry_score,
        seed = 7) 
  
    model <- score
  
    contrast_models_brier_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)
  
    contrast_models_brier_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
    contrast_models_brier_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005)
}

brier_ci_afr_multiancestry_score <- perf_model_brier_afr_multiancestry_score %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(lower <= min(upper), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_brier_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability >= 0.95,0,1)) %>% 
mutate(metric = "Brier Score") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_brier_afr_multiancestry_score)

#### auroc

In [None]:
# AUC
best_auc_afr_multiancestry_score <- tidy(perf_model_auc_afr_multiancestry_score, seed = 7) %>%
summary() %>% 
slice_max(mean) %>%
pull(model)

contrast_models_auc_02 <- list()
contrast_models_auc_01 <- list()
contrast_models_auc_005 <- list()

for (score in score_list_afr_multiancestry) {
    
    diff <-  contrast_models(
        perf_model_auc_afr_multiancestry_score,
        list_1 = best_auc_afr_multiancestry_score,
        list_2 = score,
        seed = 7) 
    
    model <- score

    contrast_models_auc_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)
  
    contrast_models_auc_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
    contrast_models_auc_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005)
}

# Probability = proportion of the posterior that is > 0 (probability that the positive difference is real)
auc_ci_afr_multiancestry_score <- perf_model_auc_afr_multiancestry_score %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(upper >= max(lower), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_auc_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_auc_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_auc_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability >= 0.95,0,1)) %>% 
mutate(metric = "AU ROC") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_auc_afr_multiancestry_score)

## combine models

### all individuals

In [None]:
# AUC AND Brier
model_metrics_df <- rbind(brier_ci, auc_ci)
nrow(model_metrics_df)
head(model_metrics_df)

### all individuals with multi ancestry scores

In [None]:
# AUC AND Brier
model_metrics_df_multiancestry_score <- rbind(brier_ci_multiancestry_score, auc_ci_multiancestry_score)
nrow(model_metrics_df_multiancestry_score)
head(model_metrics_df_multiancestry_score)

### eur individuals

In [None]:
# AUC AND Brier
model_metrics_df_eur <- rbind(brier_ci_eur, auc_ci_eur)
nrow(model_metrics_df_eur)
head(model_metrics_df_eur)

### eur individuals with multi ancestry scores

In [None]:
# AUC AND Brier
model_metrics_df_eur_multiancestry_score <- rbind(brier_ci_eur_multiancestry_score, auc_ci_eur_multiancestry_score)
nrow(model_metrics_df_eur_multiancestry_score)
head(model_metrics_df_eur_multiancestry_score)

### afr individuals

In [None]:
# AUC AND Brier
model_metrics_df_afr <- rbind(brier_ci_afr, auc_ci_afr)
nrow(model_metrics_df_afr)
head(model_metrics_df_afr)

### afr individuals with multi ancestry scores

In [None]:
# AUC AND Brier
model_metrics_df_afr_multiancestry_score <- rbind(brier_ci_afr_multiancestry_score, auc_ci_afr_multiancestry_score)
nrow(model_metrics_df_afr_multiancestry_score)
head(model_metrics_df_afr_multiancestry_score)

## export

### all individuals

In [None]:
# Save output
write.table(model_metrics_df,
           'output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

### all individuals with multiancestry scores

In [None]:
# Save output
write.table(model_metrics_df_multiancestry_score,
           'output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.performance_metrics.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

### eur individuals

In [None]:
# Save output
write.table(model_metrics_df_eur,
           'output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

### eur individuals with multiancestry scores

In [None]:
# Save output
write.table(model_metrics_df_eur_multiancestry_score,
           'output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.performance_metrics.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

### afr individuals

In [None]:
# Save output
write.table(model_metrics_df_afr,
           'output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

### afr individuals with multiancestry scores

In [None]:
# Save output
write.table(model_metrics_df_afr_multiancestry_score,
           'output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.performance_metrics.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

## make and export variance plots

### read in model metrics df (if needed)

In [None]:
model_metrics_df_eur = read.csv('output/performance_metrics/PMBB_v2.EUR.CKD.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                                sep = '\t')

In [None]:
model_metrics_df_afr = read.csv('output/performance_metrics/PMBB_v2.AFR.CKD.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                                sep = '\t')

### All

#### ROPE 0.02

In [None]:
model_metrics_plotCI_rope_02 <- model_metrics_df %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_02 == 1 ~ '2',
                             ROPE_02 == 1 & prob_dif != 1 ~ '1',
                             ROPE_02 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'PMBB v3 ALL CKD Variance by ROPE 0.02') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.02)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")

model_metrics_plotCI_rope_02
ggsave('output/plots/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.rope_0.02.equivalance_plot.png')

#### ROPE 0.01

In [None]:
model_metrics_plotCI_rope_01 <- model_metrics_df %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_01 == 1 ~ '2',
                             ROPE_01 == 1 & prob_dif != 1 ~ '1',
                             ROPE_01 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'PMBB v3 ALL CKD Variance by ROPE 0.01') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.01)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")

model_metrics_plotCI_rope_01
ggsave('output/plots/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.rope_0.01.equivalance_plot.png')

#### ROPE 0.005

In [None]:
model_metrics_plotCI_rope_005 <- model_metrics_df %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_005 == 1 ~ '2',
                             ROPE_005 == 1 & prob_dif != 1 ~ '1',
                             ROPE_005 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'PMBB v3 ALL CKD Variance by ROPE 0.005') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.005)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")

model_metrics_plotCI_rope_005
ggsave('output/plots/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.rope_0.005.equivalance_plot.png')

### all individuals with multiancestry scores

#### ROPE 0.02

In [None]:
model_metrics_plotCI_rope_02_multiancestry_score <- model_metrics_df_multiancestry_score %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_02 == 1 ~ '2',
                             ROPE_02 == 1 & prob_dif != 1 ~ '1',
                             ROPE_02 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'PMBB v3 ALL CKD Variance by ROPE 0.02') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.02)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")

model_metrics_plotCI_rope_02_multiancestry_score
ggsave('output/plots/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.rope_0.02.equivalance_plot.png')

#### ROPE 0.01

In [None]:
model_metrics_plotCI_rope_01_multiancestry_score <- model_metrics_df_multiancestry_score %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_01 == 1 ~ '2',
                             ROPE_01 == 1 & prob_dif != 1 ~ '1',
                             ROPE_01 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'PMBB v3 ALL CKD Variance by ROPE 0.01') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.01)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")

model_metrics_plotCI_rope_01_multiancestry_score
ggsave('output/plots/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.rope_0.01.equivalance_plot.png')

#### ROPE 0.005

In [None]:
model_metrics_plotCI_rope_005_multiancestry_score <- model_metrics_df_multiancestry_score %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_005 == 1 ~ '2',
                             ROPE_005 == 1 & prob_dif != 1 ~ '1',
                             ROPE_005 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'PMBB v3 ALL CKD Variance by ROPE 0.005') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.005)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")

model_metrics_plotCI_rope_005_multiancestry_score
ggsave('output/plots/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.rope_0.005.equivalance_plot.png')

### EUR

#### ROPE 0.02

In [None]:
model_metrics_plotCI_rope_02_eur <- model_metrics_df_eur %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_02 == 1 ~ '2',
                             ROPE_02 == 1 & prob_dif != 1 ~ '1',
                             ROPE_02 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'PMBB v3 EUR CKD Variance by ROPE 0.02') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.02)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")

model_metrics_plotCI_rope_02_eur
ggsave('output/plots/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.rope_0.02.equivalance_plot.png')

#### ROPE 0.01

In [None]:
model_metrics_plotCI_rope_01_eur <- model_metrics_df_eur %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_01 == 1 ~ '2',
                             ROPE_01 == 1 & prob_dif != 1 ~ '1',
                             ROPE_01 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'PMBB v3 EUR CKD Variance by ROPE 0.01') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.01)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")

model_metrics_plotCI_rope_01_eur
ggsave('output/plots/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.rope_0.01.equivalance_plot.png')

#### ROPE 0.005

In [None]:
model_metrics_plotCI_rope_005_eur <- model_metrics_df_eur %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_005 == 1 ~ '2',
                             ROPE_005 == 1 & prob_dif != 1 ~ '1',
                             ROPE_005 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'PMBB v3 EUR CKD Variance by ROPE 0.005') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.005)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")

model_metrics_plotCI_rope_005_eur
ggsave('output/plots/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.rope_0.005.equivalance_plot.png')

### EUR individuals with multi ancetry scores

#### ROPE 0.02

In [None]:
model_metrics_plotCI_rope_02_eur_multiancestry_score <- model_metrics_df_eur_multiancestry_score %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_02 == 1 ~ '2',
                             ROPE_02 == 1 & prob_dif != 1 ~ '1',
                             ROPE_02 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'PMBB v3 EUR CKD Variance by ROPE 0.02') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.02)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")

model_metrics_plotCI_rope_02_eur_multiancestry_score
ggsave('output/plots/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.rope_0.02.equivalance_plot.png')

#### ROPE 0.01

In [None]:
model_metrics_plotCI_rope_01_eur_multiancestry_score <- model_metrics_df_eur_multiancestry_score %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_01 == 1 ~ '2',
                             ROPE_01 == 1 & prob_dif != 1 ~ '1',
                             ROPE_01 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'PMBB v3 EUR CKD Variance by ROPE 0.01') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.01)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")

model_metrics_plotCI_rope_01_eur_multiancestry_score
ggsave('output/plots/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.rope_0.01.equivalance_plot.png')

#### ROPE 0.005

In [None]:
model_metrics_plotCI_rope_005_eur_multiancestry_score <- model_metrics_df_eur_multiancestry_score %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_005 == 1 ~ '2',
                             ROPE_005 == 1 & prob_dif != 1 ~ '1',
                             ROPE_005 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'PMBB v3 EUR CKD Variance by ROPE 0.005') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.005)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")

model_metrics_plotCI_rope_005_eur_multiancestry_score
ggsave('output/plots/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.rope_0.005.equivalance_plot.png')

### AFR

#### ROPE 0.02

In [None]:
model_metrics_plotCI_rope_02_afr <- model_metrics_df_afr %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_02 == 1 ~ '2',
                             ROPE_02 == 1 & prob_dif != 1 ~ '1',
                             ROPE_02 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'PMBB v3 AFR CKD Variance by ROPE 0.02') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.02)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")

model_metrics_plotCI_rope_02_afr
ggsave('output/plots/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.rope_0.02.equivalance_plot.png')

#### ROPE 0.01

In [None]:
model_metrics_plotCI_rope_01_afr <- model_metrics_df_afr %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_01 == 1 ~ '2',
                             ROPE_01 == 1 & prob_dif != 1 ~ '1',
                             ROPE_01 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'PMBB v3 AFR CKD Variance by ROPE 0.01') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.01)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")

model_metrics_plotCI_rope_01_afr
ggsave('output/plots/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.rope_0.01.equivalance_plot.png')

#### ROPE 0.005

In [None]:
model_metrics_plotCI_rope_005_afr <- model_metrics_df_afr %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_005 == 1 ~ '2',
                             ROPE_005 == 1 & prob_dif != 1 ~ '1',
                             ROPE_005 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'PMBB v3 AFR CKD Variance by ROPE 0.005') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.005)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")

model_metrics_plotCI_rope_005_afr
ggsave('output/plots/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.rope_0.005.equivalance_plot.png')

### afr individuals with multi ancestry scores

#### ROPE 0.02

In [None]:
model_metrics_plotCI_rope_02_afr_multiancestry_score <- model_metrics_df_afr_multiancestry_score %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_02 == 1 ~ '2',
                             ROPE_02 == 1 & prob_dif != 1 ~ '1',
                             ROPE_02 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'PMBB v3 AFR CKD Variance by ROPE 0.02') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.02)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")

model_metrics_plotCI_rope_02_afr_multiancestry_score
ggsave('output/plots/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.rope_0.02.equivalance_plot.png')

#### ROPE 0.01

In [None]:
model_metrics_plotCI_rope_01_afr_multiancestry_score <- model_metrics_df_afr_multiancestry_score %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_01 == 1 ~ '2',
                             ROPE_01 == 1 & prob_dif != 1 ~ '1',
                             ROPE_01 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'PMBB v3 AFR CKD Variance by ROPE 0.01') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.01)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")

model_metrics_plotCI_rope_01_afr_multiancestry_score
ggsave('output/plots/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.rope_0.01.equivalance_plot.png')

#### ROPE 0.005

In [None]:
model_metrics_plotCI_rope_005_afr_multiancestry_score <- model_metrics_df_afr_multiancestry_score %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_005 == 1 ~ '2',
                             ROPE_005 == 1 & prob_dif != 1 ~ '1',
                             ROPE_005 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'PMBB v3 AFR CKD Variance by ROPE 0.005') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.005)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")

model_metrics_plotCI_rope_005_afr_multiancestry_score
ggsave('output/plots/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.rope_0.005.equivalance_plot.png')

# individual score percentile

## read in input file (if needed)

In [None]:
model_metrics_df = read.csv('output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                            sep = '\t')

In [None]:
model_metrics_df_eur = read.csv('output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                                sep = '\t')

In [None]:
model_metrics_df_afr = read.csv('output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                                sep = '\t')

## calculate

### all individuals

In [None]:
df_ntile_norm <- all_score %>% 
filter(!grepl('PGS005090|PGS002757|AFR.eGFR.PRScsx|AMR.eGFR.PRScsx|EAS.eGFR.PRScsx|EUR.eGFR.PRScsx',PGS)) %>%
dplyr::mutate(ntile = 100 * pnorm(Z_norm2))
head(df_ntile_norm)
length(unique(df_ntile_norm$PGS))

### all individuals multiancestry scores

In [None]:
df_ntile_norm_multiancestry_score <- all_score %>% 
filter(!grepl("PGS002757|PGS005090|AFR|AMR|EAS|EUR.eGFR.PRScsx", PGS)) %>%
dplyr::mutate(ntile = 100 * pnorm(Z_norm2))
head(df_ntile_norm_multiancestry_score)
length(unique(df_ntile_norm_multiancestry_score$PGS))

### eur

In [None]:
df_ntile_norm_eur <- all_score_eur %>% 
filter(!grepl('PGS005090|PGS002757|AFR.eGFR.PRScsx|AMR.eGFR.PRScsx|EAS.eGFR.PRScsx|EUR.eGFR.PRScsx',PGS)) %>%
dplyr::mutate(ntile = 100 * pnorm(Z_norm2))
head(df_ntile_norm_eur)
length(unique(df_ntile_norm_eur$PGS))

### eur individuals multiancestry scores

In [None]:
df_ntile_norm_eur_multiancestry_score <- all_score_eur %>% 
filter(!grepl("PGS002757|PGS005090|AFR|AMR|EAS|EUR.eGFR.PRScsx", PGS)) %>%
dplyr::mutate(ntile = 100 * pnorm(Z_norm2))
head(df_ntile_norm_eur_multiancestry_score)
length(unique(df_ntile_norm_eur_multiancestry_score$PGS))

### afr

In [None]:
df_ntile_norm_afr <- all_score_afr %>% 
filter(grepl('AFR.Phe_585.3.PRScsx|AFR.eGFR.flip.PRScsx|AMR.eGFR.flip.PRScsx|EAS.Phe_585.3.PRScsx|EAS.eGFR.flip.PRScsx|EUR.Phe_585.3.PRScsx|EUR.eGFR.flip.PRScsx|PGS000728|PGS002237|PGS003988|PGS004004|PGS004016|PGS004030|PGS004045|PGS004058|PGS004074|PGS004088|PGS004101|PGS004128|PGS004142|PGS004158|PGS004889|PGS005113',
             PGS)) %>%
dplyr::mutate(ntile = 100 * pnorm(Z_norm2))
head(df_ntile_norm_afr)
length(unique(df_ntile_norm_afr$PGS))

### afr individuals multiancestry scores

In [None]:
df_ntile_norm_afr_multiancestry_score <- all_score_afr %>% 
filter(grepl('EUR.Phe_585.3.PRScsx|EUR.eGFR.flip.PRScsx|PGS000728|PGS002237|PGS003988|PGS004004|PGS004016|PGS004030|PGS004045|PGS004058|PGS004074|PGS004088|PGS004101|PGS004128|PGS004142|PGS004158|PGS004889|PGS005113',
             PGS)) %>%
dplyr::mutate(ntile = 100 * pnorm(Z_norm2))
head(df_ntile_norm_afr_multiancestry_score)
length(unique(df_ntile_norm_afr_multiancestry_score$PGS))

### PRScs iterations

In [None]:
df_ntile_norm_afr_egfr <- afr_egfr_merge %>% 
dplyr::mutate(ntile = 100 * pnorm(Z_norm2))
head(df_ntile_norm_afr_egfr)
length(unique(df_ntile_norm_afr_egfr$PGS))

In [None]:
df_ntile_norm_eur_egfr <- eur_egfr_merge %>% 
dplyr::mutate(ntile = 100 * pnorm(Z_norm2))
head(df_ntile_norm_eur_egfr)
length(unique(df_ntile_norm_eur_egfr$PGS))

In [None]:
df_ntile_norm_meta_egfr <- meta_egfr_merge %>% 
dplyr::mutate(ntile = 100 * pnorm(Z_norm2))
head(df_ntile_norm_meta_egfr)
length(unique(df_ntile_norm_meta_egfr$PGS))

## reformat df for individual percentile plots

### all individuals

In [None]:
df_ntile_norm_wide = df_ntile_norm %>%
pivot_wider(names_from = 'PGS',
           values_from = 'ntile',
           id_cols = 'IID',
           names_prefix = 'ntile_')
dim(df_ntile_norm_wide)
head(df_ntile_norm_wide)
write.table(df_ntile_norm_wide,
           'output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.wide_form.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

### all individuals multiancestry scores

In [None]:
df_ntile_norm_wide_multiancestry_score = df_ntile_norm_multiancestry_score %>%
pivot_wider(names_from = 'PGS',
           values_from = 'ntile',
           id_cols = 'IID',
           names_prefix = 'ntile_')
dim(df_ntile_norm_wide_multiancestry_score)
head(df_ntile_norm_wide_multiancestry_score)
write.table(df_ntile_norm_wide_multiancestry_score,
           'output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_percentile.wide_form.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

### eur

In [None]:
df_ntile_norm_wide_eur = df_ntile_norm_eur %>%
pivot_wider(names_from = 'PGS',
           values_from = 'ntile',
           id_cols = 'IID',
           names_prefix = 'ntile_')
dim(df_ntile_norm_wide_eur)
head(df_ntile_norm_wide_eur)
write.table(df_ntile_norm_wide_eur,
           'output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.wide_form.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

### eur individuals multiancestry scores

In [None]:
df_ntile_norm_wide_eur_multiancestry_score = df_ntile_norm_eur_multiancestry_score %>%
pivot_wider(names_from = 'PGS',
           values_from = 'ntile',
           id_cols = 'IID',
           names_prefix = 'ntile_')
dim(df_ntile_norm_wide_eur_multiancestry_score)
head(df_ntile_norm_wide_eur_multiancestry_score)
write.table(df_ntile_norm_wide_eur_multiancestry_score,
           'output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_percentile.wide_form.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

### afr

In [None]:
df_ntile_norm_wide_afr = df_ntile_norm_afr %>%
pivot_wider(names_from = 'PGS',
           values_from = 'ntile',
           id_cols = 'IID',
           names_prefix = 'ntile_')
dim(df_ntile_norm_wide_afr)
head(df_ntile_norm_wide_afr)
write.table(df_ntile_norm_wide_afr,
           'output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.wide_form.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

### afr individuals multiancestry scores

In [None]:
df_ntile_norm_wide_afr_multiancestry_score = df_ntile_norm_afr_multiancestry_score %>%
pivot_wider(names_from = 'PGS',
           values_from = 'ntile',
           id_cols = 'IID',
           names_prefix = 'ntile_')
dim(df_ntile_norm_wide_afr_multiancestry_score)
head(df_ntile_norm_wide_afr_multiancestry_score)
write.table(df_ntile_norm_wide_afr_multiancestry_score,
           'output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_percentile.wide_form.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

### PRScs iterations

In [None]:
df_ntile_norm_wide_afr_egfr = df_ntile_norm_afr_egfr %>%
pivot_wider(names_from = 'PGS',
           values_from = 'ntile',
           id_cols = 'IID',
           names_prefix = 'ntile_')
dim(df_ntile_norm_wide_afr_egfr)
head(df_ntile_norm_wide_afr_egfr)
write.table(df_ntile_norm_wide_afr_egfr,
           'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.AFR_eGFR_flip.PRScs_iterations.individual_percentile.wide_form.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

In [None]:
df_ntile_norm_wide_amr_egfr = df_ntile_norm_amr_egfr %>%
pivot_wider(names_from = 'PGS',
           values_from = 'ntile',
           id_cols = 'IID',
           names_prefix = 'ntile_')
dim(df_ntile_norm_wide_amr_egfr)
head(df_ntile_norm_wide_amr_egfr)
write.table(df_ntile_norm_wide_amr_egfr,
           'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.AMR_eGFR_flip.PRScs_iterations.individual_percentile.wide_form.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

### eur egfr- all

In [None]:
df_ntile_norm_wide_eur_egfr = df_ntile_norm_eur_egfr %>%
pivot_wider(names_from = 'PGS',
           values_from = 'ntile',
           id_cols = 'IID',
           names_prefix = 'ntile_')
dim(df_ntile_norm_wide_eur_egfr)
head(df_ntile_norm_wide_eur_egfr)
write.table(df_ntile_norm_wide_eur_egfr,
           'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.EUR_eGFR_flip.PRScs_iterations.individual_percentile.wide_form.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

In [None]:
df_ntile_norm_wide_eur_phe = df_ntile_norm_eur_phe %>%
pivot_wider(names_from = 'PGS',
           values_from = 'ntile',
           id_cols = 'IID',
           names_prefix = 'ntile_')
dim(df_ntile_norm_wide_eur_phe)
head(df_ntile_norm_wide_eur_phe)
write.table(df_ntile_norm_wide_eur_phe,
           'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.EUR_Phe_585.3.PRScs_iterations.individual_percentile.wide_form.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

In [None]:
df_ntile_norm_wide_eas_egfr = df_ntile_norm_eas_egfr %>%
pivot_wider(names_from = 'PGS',
           values_from = 'ntile',
           id_cols = 'IID',
           names_prefix = 'ntile_')
dim(df_ntile_norm_wide_eas_egfr)
head(df_ntile_norm_wide_eas_egfr)
write.table(df_ntile_norm_wide_eas_egfr,
           'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.EAS_eGFR_flip.PRScs_iterations.individual_percentile.wide_form.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

In [None]:
df_ntile_norm_wide_eas_phe = df_ntile_norm_eas_phe %>%
pivot_wider(names_from = 'PGS',
           values_from = 'ntile',
           id_cols = 'IID',
           names_prefix = 'ntile_')
dim(df_ntile_norm_wide_eas_phe)
head(df_ntile_norm_wide_eas_phe)
write.table(df_ntile_norm_wide_eas_phe,
           'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.EAS_Phe_585.3.PRScs_iterations.individual_percentile.wide_form.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

## score lists function

In [None]:
equiv_scores <- function(df, criteria, ntile = FALSE) {
    # Ensure criteria is a column in df
    if (!criteria %in% names(df)) {
        stop(paste0("Criteria column ", criteria, " not found in dataframe"))
    }
  
    # Filter models based on criteria for each group
    models <- df %>%
    group_by(model) %>%
    filter(all(.data[[criteria]] == 1)) %>%
    ungroup() %>%
    pull(model) %>%
    unique()
  
    # Sort models
    sorted_models <- models
  
    # Return the appropriate list based on ntile flag
    if (ntile) {
        ntile_list <- paste("ntile_", sorted_models, sep = "")
        return(ntile_list)
    } else {
        return(sorted_models)
    }
}

## make dot plot for 5 individuals PRS percentiles across scores

### all individuals

In [None]:
# Make into a function
plot_indiv_score <- function(df_ntile, all_metrics_df, criteria = "ROPE_02", n_indiv = 5, seed = 7) {
    
    ntile_list <- equiv_scores(all_metrics_df, criteria , ntile = TRUE)
    model_list <- equiv_scores(all_metrics_df, criteria , ntile = FALSE)
  
    random_ntile <- sample_n(df_ntile, n_indiv) %>% 
    select(IID, all_of(ntile_list))
  
    melt_random_ntile <- reshape2::melt(random_ntile, id = c("IID"))  %>% 
    mutate(variable = str_replace(variable, "ntile_", ""))
  
    melt_random_ntile$variable <- factor(melt_random_ntile$variable, levels = model_list)
    melt_random_ntile$IID <- factor(melt_random_ntile$IID)
  
    score_plot_ntile_all <- ggplot(data = melt_random_ntile, aes(x = variable, y = value, color = IID, group = IID)) +
    geom_point(size = 3) +
    labs(x = "Score", y = "Percentile", title = "PMBB v3 ALL CKD PGS Percentile Variation") + 
    facet_wrap(IID ~ . , nrow = n_indiv) +
    scale_color_viridis_d(option = 'H') +
    theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
          strip.background = element_blank(),
          strip.text.x = element_blank(),
         plot.margin = unit(c(1, 1, 2, 2), "cm"))
  
    return(score_plot_ntile_all)
}


indiv_dot_plot <- plot_indiv_score(df_ntile_norm_wide, model_metrics_df)
indiv_dot_plot
ggsave('/output/plots/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.dot_plot.png',
       width = 9,
       height = 6,
       dpi = 300)

### eur individuals

In [None]:
# Make into a function
plot_indiv_score <- function(df_ntile, all_metrics_df, criteria = "ROPE_02", n_indiv = 5, seed = 7) {
    
    ntile_list <- equiv_scores(all_metrics_df, criteria , ntile = TRUE)
    model_list <- equiv_scores(all_metrics_df, criteria , ntile = FALSE)
  
    random_ntile <- sample_n(df_ntile, n_indiv) %>% 
    select(IID, all_of(ntile_list))
  
    melt_random_ntile <- reshape2::melt(random_ntile, id = c("IID"))  %>% 
    mutate(variable = str_replace(variable, "ntile_", ""))
  
    melt_random_ntile$variable <- factor(melt_random_ntile$variable, levels = model_list)
    melt_random_ntile$IID <- factor(melt_random_ntile$IID)
  
    score_plot_ntile_all <- ggplot(data = melt_random_ntile, aes(x = variable, y = value, color = IID, group = IID)) +
    geom_point(size = 3) +
    labs(x = "Score", y = "Percentile", title = "PMBB v3 EUR CKD PGS Percentile Variation") + 
    facet_wrap(IID ~ . , nrow = n_indiv) +
    scale_color_viridis_d(option = 'H') +
    theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
          strip.background = element_blank(),
          strip.text.x = element_blank(),
         plot.margin = unit(c(1, 1, 2, 2), "cm"))
  
    return(score_plot_ntile_all)
}


indiv_dot_plot_eur <- plot_indiv_score(df_ntile_norm_wide_eur, model_metrics_df_eur)
indiv_dot_plot_eur
ggsave('output/plots/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.dot_plot.png',
       width = 9,
       height = 6,
       dpi = 300)

### afr

In [None]:
# Make into a function
plot_indiv_score <- function(df_ntile, all_metrics_df, criteria = "ROPE_02", n_indiv = 5, seed = 7) {
    
    ntile_list <- equiv_scores(all_metrics_df, criteria , ntile = TRUE)
    model_list <- equiv_scores(all_metrics_df, criteria , ntile = FALSE)
  
    random_ntile <- sample_n(df_ntile, n_indiv) %>% 
    select(IID, all_of(ntile_list))
  
    melt_random_ntile <- reshape2::melt(random_ntile, id = c("IID"))  %>% 
    mutate(variable = str_replace(variable, "ntile_", ""))
  
    melt_random_ntile$variable <- factor(melt_random_ntile$variable, levels = model_list)
    melt_random_ntile$IID <- factor(melt_random_ntile$IID)
  
    score_plot_ntile_all <- ggplot(data = melt_random_ntile, aes(x = variable, y = value, color = IID, group = IID)) +
    geom_point(size = 3) +
    labs(x = "Score", y = "Percentile", title = "PMBB v3 AFR CKD PGS Percentile Variation") + 
    facet_wrap(IID ~ . , nrow = n_indiv) +
    scale_color_viridis_d(option = 'H') +
    theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
          strip.background = element_blank(),
          strip.text.x = element_blank(),
         plot.margin = unit(c(1, 1, 2, 2), "cm"))
  
    return(score_plot_ntile_all)
}


indiv_dot_plot_afr <- plot_indiv_score(df_ntile_norm_wide_afr, model_metrics_df_afr)
indiv_dot_plot_afr
ggsave('output/plots/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.dot_plot.png',
       width = 9,
       height = 6,
       dpi = 300)

## reformat df for percentile stats function

In [None]:
all_score_pheno_covar = all_score %>%
select(IID, SEX, AGE, CKD, MostSimilarPop) %>%
distinct()

df_score_wide = df_ntile_norm %>%
pivot_wider(names_from = 'PGS',
           values_from = 'Z_norm2',
           id_cols = 'IID')

df_ntile_norm_score_wide_pheno_covar = df_ntile_norm_wide %>%
inner_join(all_score_pheno_covar, by = 'IID') %>%
inner_join(df_score_wide, by = 'IID')
dim(df_ntile_norm_score_wide_pheno_covar)
head(df_ntile_norm_score_wide_pheno_covar)

## calculate percentile stats across all individuals

In [None]:
make_sumscores_pivot <- function(df_ntile_norm, all_metrics_df = model_metrics_df,  criteria = "ROPE_02") {
    ntile_list <- equiv_scores(all_metrics_df, criteria , ntile = TRUE)
    model_list <- equiv_scores(all_metrics_df, criteria , ntile = FALSE)
    pheno <- "CKD_status"
    
    sumscores_pivot <- df_ntile_norm %>% 
    select(IID, AGE, SEX, MostSimilarPop, CKD, all_of(ntile_list), all_of(model_list)) %>% 
    mutate(!!pheno := base::as.factor(CKD)) %>%
    pivot_longer(cols = matches("^(PGS|EAS|AFR|AMR|EUR)"), names_to = "PGS_method", values_to = "PGS") %>% 
    pivot_longer(cols = starts_with("ntile"), names_to = "ntile_method", values_to = "ntile")
  
    return(sumscores_pivot)
}

sumscores_pivot <- make_sumscores_pivot(df_ntile_norm_score_wide_pheno_covar)

sum_ntile <- sumscores_pivot %>% 
  desc_statby(., measure.var = "ntile", grps = c("IID"))

In [None]:
head(sumscores_pivot)
nrow(sumscores_pivot)

In [None]:
head(sum_ntile)
nrow(sum_ntile)

In [None]:
write.table(sumscores_pivot,
           gzfile('output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.pivot.txt.gz'),
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

In [None]:
write.table(sum_ntile,
           'output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.sumstats.individual_level.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

## make "all individual percentile plots" aggreated by mean, sd, and cv

In [None]:
sum_metrics <- c("mean", "sd", "cv")
metric_labels <- c("mean" = "Mean", "sd" = "Standard Deviation", "cv" = "Coefficient of Variation")
titles = c("mean" = "Distribution of Mean PGS Percentiles per Individual",
          "sd" = "Distribution of PGS Percentile Standard Deviation per Individual",
          "cv" = "Distribution of PGS Percentile Coefficient of Variance per Individual")

# Revised version with percentile labels
plot_list <- list()

# density plot
for (metric in sum_metrics) {
    metric_label <- metric_labels[metric]
    title <- titles[metric]
    plot <- ggplot(sum_ntile, aes(x = .data[[metric]])) + geom_density(alpha = .7, color = "#B24745FF",  fill = "#B24745FF") + 
    labs(title = title,
         x = paste("PMBB v3 PGS Percentile", metric_label),
         y = "Density"
        ) + theme(legend.position = "none")
    # Add the plot to the list
    plot_list[[metric]] <- plot
}
# Create a named list of plots
named_plot_list <- setNames(plot_list, sum_metrics)
mean <-(named_plot_list[[1]])
sd <-(named_plot_list[[2]])
cv <-(named_plot_list[[3]])

In [None]:
mean
ggsave('output/plots/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_mean_percentile.density_plot.png')

In [None]:
sd
ggsave('output/plots/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_stdev_percentile.density_plot.png')

In [None]:
cv
ggsave('output/plots/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_coefficient_variation_percentile.density_plot.png')

## compute avg stats acorss all people and scores

In [None]:
sum_metrics <- c("mean", "sd", "cv")

avg_stats <- lapply(sum_metrics, function(met) {
  boot_med <- simpleboot::one.boot(sum_ntile[[met]], median, R = 1000)
  ci_result <- boot::boot.ci(boot_med, conf = 0.95, type = "norm")

  # Adjust this part based on the structure of 'ci_result'
  # For example, if 'ci_result' has elements named 'normal', 'basic', etc.
  # Extract the desired confidence interval bounds from the correct sub-element
  lower_bound <- ci_result$normal[[2]]  
  upper_bound <- ci_result$normal[[3]]
  median <- ci_result$t0

  df_result <- data.frame(Metric = met, 
                          Lower = lower_bound, 
                          Upper = upper_bound,
                          median = median)
  return(df_result)
}) %>% 
  bind_rows(.)


write.table(avg_stats,
            'output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.sumstats.all_individuals_scores.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)
            
head(avg_stats)

## make plots showing range of individual-level discordance in percentile classification

### create score list function (if needed)

In [None]:
equiv_scores <- function(df, criteria, ntile = FALSE) {
  # Ensure criteria is a column in df
  if (!criteria %in% names(df)) {
    stop(paste0("Criteria column ", criteria, " not found in dataframe"))
  }
  
  # Filter models based on criteria for each group
  models <- df %>%
    group_by(model) %>%
    filter(all(.data[[criteria]] == 1)) %>%
    ungroup() %>%
    pull(model) %>%
    unique()
  
  # Sort models
  sorted_models <- models
  
  # Return the appropriate list based on ntile flag
  if (ntile) {
    ntile_list <- paste("ntile_", sorted_models, sep = "")
    return(ntile_list)
  } else {
    return(sorted_models)
  }
}

### make inputs

#### all individuals

In [None]:
metrics <- c("ROPE_02")
ntile_list <- equiv_scores(model_metrics_df, metrics , ntile = TRUE)
model_list <- equiv_scores(model_metrics_df, metrics , ntile = FALSE)

ntile_df_plot <- df_ntile_norm_wide %>% 
    select(IID, all_of(ntile_list))  %>% # ntile list is list of score names with ntile prefix to include
    pivot_longer(cols = -IID) %>% 
    mutate(name = str_replace(name, "ntile_", "")) %>% 
    filter(name %in% model_list) %>% 
    #select(IID, value) %>% # just get to IID and values
    group_by(IID) %>%
    mutate(max_score = max(value), min_score = min(value)) %>%
    ungroup() %>% # This gets you a data frame with IID, Score name, score value, and that IID's max and min score
    select(IID, max_score, min_score) %>%
    mutate(score_range = max_score - min_score) %>%
    distinct()

ntile_plot_groups <- ntile_df_plot %>%
mutate(risk = case_when(
    max_score > 98 & min_score < 2 ~ "Above 98% and Below 2%",
    max_score > 95 & min_score < 5 ~ "Above 95% and Below 5%",
    max_score > 80 & min_score < 20 ~ "Above 80% and Below 20%",
    .default = "Intermediate agreement")) %>%
mutate(risk = forcats::fct_relevel(risk, 
                                   c("Above 98% and Below 2%",
                                     "Above 95% and Below 5%", 
                                     "Above 80% and Below 20%", 
                                     "Intermediate agreement")))
      
head(ntile_plot_groups)
unique(ntile_plot_groups$risk)
ntile_df_plot %>%
    filter(score_range>= 98) %>%
    nrow()

#### eur

In [None]:
metrics <- c("ROPE_02")
ntile_list <- equiv_scores(model_metrics_df_eur, metrics , ntile = TRUE)
model_list <- equiv_scores(model_metrics_df_eur, metrics , ntile = FALSE)

ntile_df_plot_eur <- df_ntile_norm_wide_eur %>% 
    select(IID, all_of(ntile_list))  %>% # ntile list is list of score names with ntile prefix to include
    pivot_longer(cols = -IID) %>% 
    mutate(name = str_replace(name, "ntile_", "")) %>% 
    filter(name %in% model_list) %>% 
    #select(IID, value) %>% # just get to IID and values
    group_by(IID) %>%
    mutate(max_score = max(value), min_score = min(value)) %>%
    ungroup() %>% # This gets you a data frame with IID, Score name, score value, and that IID's max and min score
    select(IID, max_score, min_score) %>%
    mutate(score_range = max_score - min_score) %>%
    distinct()

ntile_plot_groups_eur <- ntile_df_plot_eur %>%
mutate(risk = case_when(
    max_score > 98 & min_score < 2 ~ "Above 98% and Below 2%",
    max_score > 95 & min_score < 5 ~ "Above 95% and Below 5%",
    max_score > 80 & min_score < 20 ~ "Above 80% and Below 20%",
    .default = "Intermediate agreement")) %>%
mutate(risk = forcats::fct_relevel(risk, 
                                   c("Above 98% and Below 2%",
                                     "Above 95% and Below 5%", 
                                     "Above 80% and Below 20%", 
                                     "Intermediate agreement")))
      
head(ntile_plot_groups_eur)
unique(ntile_plot_groups_eur$risk)
ntile_df_plot_eur %>%
    filter(score_range>= 98) %>%
    nrow()

#### afr

In [None]:
metrics <- c("ROPE_02")
ntile_list <- equiv_scores(model_metrics_df_afr, metrics , ntile = TRUE)
model_list <- equiv_scores(model_metrics_df_afr, metrics , ntile = FALSE)

ntile_df_plot_afr <- df_ntile_norm_wide_afr %>% 
    select(IID, all_of(ntile_list))  %>% # ntile list is list of score names with ntile prefix to include
    pivot_longer(cols = -IID) %>% 
    mutate(name = str_replace(name, "ntile_", "")) %>% 
    filter(name %in% model_list) %>% 
    #select(IID, value) %>% # just get to IID and values
    group_by(IID) %>%
    mutate(max_score = max(value), min_score = min(value)) %>%
    ungroup() %>% # This gets you a data frame with IID, Score name, score value, and that IID's max and min score
    select(IID, max_score, min_score) %>%
    mutate(score_range = max_score - min_score) %>%
    distinct()

ntile_plot_groups_afr <- ntile_df_plot_afr %>%
mutate(risk = case_when(
    max_score > 98 & min_score < 2 ~ "Above 98% and Below 2%",
    max_score > 95 & min_score < 5 ~ "Above 95% and Below 5%",
    max_score > 80 & min_score < 20 ~ "Above 80% and Below 20%",
    .default = "Intermediate agreement")) %>%
mutate(risk = forcats::fct_relevel(risk, 
                                   c("Above 98% and Below 2%",
                                     "Above 95% and Below 5%", 
                                     "Above 80% and Below 20%", 
                                     "Intermediate agreement")))
      
head(ntile_plot_groups_afr)
unique(ntile_plot_groups_afr$risk)
ntile_df_plot_afr %>%
    filter(score_range>= 98) %>%
    nrow()

### make pretty plot

#### all individuals

In [None]:
ntile_plot_groups %>%
ggplot(aes(x = max_score, y = min_score)) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
geom_point(aes(fill = risk), shape = 21, size = 2) +
geom_hline(yintercept = c(5, 20), linetype = "dotted") +
geom_vline(xintercept = c(80, 95), linetype = "dotted") +
scale_x_continuous(labels = scales::percent_format(scale = 1), limits = c(0, 100)) +
scale_y_continuous(labels = scales::percent_format(scale = 1), limits = c(0, 100)) +
labs(x = "Maximum Score Percentile",
     y = "Minimum Score Percentile",
     title = "PMBB v3 ALL CKD PGS Percentile Consistency Across Models",
     fill = "Risk Groups") +
guides(fill = guide_legend(override.aes = list(alpha = 1, size = 3))) +
coord_fixed() +
scale_fill_manual(values = c("Above 98% and Below 2%" = "orange",
                              "Above 95% and Below 5%" = "darkorchid1",
                              "Above 80% and Below 20%" = "skyblue",
                             "Intermediate agreement" = "pink"))
ggsave('output/plots/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile_variation_range.png')

#### eur

In [None]:
ntile_plot_groups_eur %>%
ggplot(aes(x = max_score, y = min_score)) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
geom_point(aes(fill = risk), shape = 21, size = 2) +
geom_hline(yintercept = c(5, 20), linetype = "dotted") +
geom_vline(xintercept = c(80, 95), linetype = "dotted") +
scale_x_continuous(labels = scales::percent_format(scale = 1), limits = c(0, 100)) +
scale_y_continuous(labels = scales::percent_format(scale = 1), limits = c(0, 100)) +
labs(x = "Maximum Score Percentile",
     y = "Minimum Score Percentile",
     title = "PMBB v3 EUR CKD PGS Percentile Consistency Across Models",
     fill = "Risk Groups") +
guides(fill = guide_legend(override.aes = list(alpha = 1, size = 3))) +
coord_fixed() +
scale_fill_manual(values = c("Above 98% and Below 2%" = "orange",
                              "Above 95% and Below 5%" = "darkorchid1",
                              "Above 80% and Below 20%" = "skyblue",
                             "Intermediate agreement" = "pink"))
ggsave('output/plots/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile_variation_range.png')

#### afr

In [None]:
ntile_plot_groups_afr %>%
ggplot(aes(x = max_score, y = min_score)) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
geom_point(aes(fill = risk), shape = 21, size = 2) +
geom_hline(yintercept = c(5, 20), linetype = "dotted") +
geom_vline(xintercept = c(80, 95), linetype = "dotted") +
scale_x_continuous(labels = scales::percent_format(scale = 1), limits = c(0, 100)) +
scale_y_continuous(labels = scales::percent_format(scale = 1), limits = c(0, 100)) +
labs(x = "Maximum Score Percentile",
     y = "Minimum Score Percentile",
     title = "PMBB v3 AFR CKD PGS Percentile Consistency Across Models",
     fill = "Risk Groups") +
guides(fill = guide_legend(override.aes = list(alpha = 1, size = 3))) +
coord_fixed() +
scale_fill_manual(values = c("Above 98% and Below 2%" = "orange",
                              "Above 95% and Below 5%" = "darkorchid1",
                              "Above 80% and Below 20%" = "skyblue",
                             "Intermediate agreement" = "pink"))
ggsave('output/plots/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile_variation_range.png')

### compute summary stats for these differences

#### all

In [None]:
risk_stats <- ntile_df_plot %>%
        summarise(
          total = n(),
          extreme = sum(max_score > 95 & min_score < 5),
          wide = sum(max_score > 80 & min_score < 20),
          intermediate = sum(!(max_score > 80 & min_score < 20))
        ) %>%
        mutate(
          extreme_pct = extreme / total * 100,
          wide_pct = wide / total * 100,
          intermediate_pct = intermediate / total * 100
        )

risk_labels <- c(
        "Above 95% and Below 5%" = sprintf("Above 95%% and Below 5%% (%.1f%% of participants)", risk_stats$extreme_pct),
        "Above 80% and Below 20%" = sprintf("Above 80%% and Below 20%% (%.1f%% of participants)", risk_stats$wide_pct),
        "Intermediate agreement" = sprintf("Intermediate agreement (%.1f%% of participants)", risk_stats$intermediate_pct)
      )
head(risk_stats)
write.table(risk_stats,
           'output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile_variation_range.sumstats.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

#### eur

In [None]:
risk_stats_eur <- ntile_df_plot_eur %>%
        summarise(
          total = n(),
          extreme = sum(max_score > 95 & min_score < 5),
          wide = sum(max_score > 80 & min_score < 20),
          intermediate = sum(!(max_score > 80 & min_score < 20))
        ) %>%
        mutate(
          extreme_pct = extreme / total * 100,
          wide_pct = wide / total * 100,
          intermediate_pct = intermediate / total * 100
        )

risk_labels <- c(
        "Above 95% and Below 5%" = sprintf("Above 95%% and Below 5%% (%.1f%% of participants)", risk_stats_eur$extreme_pct),
        "Above 80% and Below 20%" = sprintf("Above 80%% and Below 20%% (%.1f%% of participants)", risk_stats_eur$wide_pct),
        "Intermediate agreement" = sprintf("Intermediate agreement (%.1f%% of participants)", risk_stats_eur$intermediate_pct)
      )
head(risk_stats_eur)
write.table(risk_stats_eur,
           'output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile_variation_range.sumstats.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

#### afr

In [None]:
risk_stats_afr <- ntile_df_plot_afr %>%
        summarise(
          total = n(),
          extreme = sum(max_score > 95 & min_score < 5),
          wide = sum(max_score > 80 & min_score < 20),
          intermediate = sum(!(max_score > 80 & min_score < 20))
        ) %>%
        mutate(
          extreme_pct = extreme / total * 100,
          wide_pct = wide / total * 100,
          intermediate_pct = intermediate / total * 100
        )

risk_labels <- c(
        "Above 95% and Below 5%" = sprintf("Above 95%% and Below 5%% (%.1f%% of participants)", risk_stats_afr$extreme_pct),
        "Above 80% and Below 20%" = sprintf("Above 80%% and Below 20%% (%.1f%% of participants)", risk_stats_afr$wide_pct),
        "Intermediate agreement" = sprintf("Intermediate agreement (%.1f%% of participants)", risk_stats_afr$intermediate_pct)
      )
head(risk_stats_afr)
write.table(risk_stats_afr,
           'output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile_variation_range.sumstats.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

# identify individuals with 95% confidence

## compute mean PGS

In [None]:
df_ntile_norm_afr_egfr_mean = df_ntile_norm_afr_egfr %>%
group_by(IID) %>%
summarise(MEAN_PGS = mean(ntile))
summary(df_ntile_norm_afr_egfr_mean$MEAN_PGS)

In [None]:
df_ntile_norm_eur_egfr_mean = df_ntile_norm_eur_egfr %>%
group_by(IID) %>%
summarise(MEAN_PGS = mean(ntile))
summary(df_ntile_norm_eur_egfr_mean$MEAN_PGS)

In [None]:
df_ntile_norm_meta_egfr_mean = df_ntile_norm_meta_egfr %>%
group_by(IID) %>%
summarise(MEAN_PGS = mean(ntile))
summary(df_ntile_norm_meta_egfr_mean$MEAN_PGS)

## get N scores in top 2% per person

In [None]:
df_ntile_norm_afr_egfr_count = df_ntile_norm_afr_egfr %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n())
summary(df_ntile_norm_afr_egfr_count$COUNT_PGS)

In [None]:
df_ntile_norm_eur_egfr_count = df_ntile_norm_eur_egfr %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n())
summary(df_ntile_norm_eur_egfr_count$COUNT_PGS)

In [None]:
df_ntile_norm_meta_egfr_count = df_ntile_norm_meta_egfr %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n())
summary(df_ntile_norm_meta_egfr_count$COUNT_PGS)

In [None]:
df_ntile_norm_afr_phe_count = df_ntile_norm_afr_phe %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n())
summary(df_ntile_norm_afr_phe_count$COUNT_PGS)

In [None]:
df_ntile_norm_amr_egfr_count = df_ntile_norm_amr_egfr %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n())
summary(df_ntile_norm_amr_egfr_count$COUNT_PGS)

In [None]:
df_ntile_norm_eur_phe_count = df_ntile_norm_eur_phe %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n())
summary(df_ntile_norm_eur_phe_count$COUNT_PGS)

In [None]:
df_ntile_norm_eas_egfr_count = df_ntile_norm_eas_egfr %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n())
summary(df_ntile_norm_eas_egfr_count$COUNT_PGS)

In [None]:
df_ntile_norm_eas_phe_count = df_ntile_norm_eas_phe %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n())
summary(df_ntile_norm_eas_phe_count$COUNT_PGS)

In [None]:
df_ntile_norm_meta_phe_count = df_ntile_norm_meta_phe %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n())
summary(df_ntile_norm_meta_phe_count$COUNT_PGS)

## get N scores in bottom 2%

In [None]:
df_ntile_norm_afr_egfr_count_low = df_ntile_norm_afr_egfr %>%
filter(ntile <= 2) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n())
summary(df_ntile_norm_afr_egfr_count_low$COUNT_PGS)

In [None]:
df_ntile_norm_eur_egfr_count_low = df_ntile_norm_eur_egfr %>%
filter(ntile <= 2) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n())
summary(df_ntile_norm_eur_egfr_count_low$COUNT_PGS)

In [None]:
df_ntile_norm_meta_egfr_count_low = df_ntile_norm_meta_egfr %>%
filter(ntile <= 2) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n())
summary(df_ntile_norm_meta_egfr_count_low$COUNT_PGS)

## create eur and afr dfs

In [None]:
afr = pop_sub %>%
filter(MostSimilarPop == 'AFR')
nrow(afr)

In [None]:
eur = pop_sub %>%
filter(MostSimilarPop == 'EUR')
nrow(eur)

## see how many individuals have count > 95

### afr egfr

#### top 98%

In [None]:
df_ntile_norm_afr_egfr_90 = df_ntile_norm_afr_egfr_count %>%
filter(COUNT_PGS >= 90)
nrow(df_ntile_norm_afr_egfr_90)
nrow(df_ntile_norm_afr_egfr_90[df_ntile_norm_afr_egfr_90$IID %in% eur$IID,])
nrow(df_ntile_norm_afr_egfr_90[df_ntile_norm_afr_egfr_90$IID %in% afr$IID,])

In [None]:
df_ntile_norm_afr_egfr_80 = df_ntile_norm_afr_egfr_count %>%
filter(COUNT_PGS >= 80)
nrow(df_ntile_norm_afr_egfr_80)
nrow(df_ntile_norm_afr_egfr_80[df_ntile_norm_afr_egfr_80$IID %in% eur$IID,])
nrow(df_ntile_norm_afr_egfr_80[df_ntile_norm_afr_egfr_80$IID %in% afr$IID,])

In [None]:
df_ntile_norm_afr_egfr_70 = df_ntile_norm_afr_egfr_count %>%
filter(COUNT_PGS >= 70)
nrow(df_ntile_norm_afr_egfr_70)
nrow(df_ntile_norm_afr_egfr_70[df_ntile_norm_afr_egfr_70$IID %in% eur$IID,])
nrow(df_ntile_norm_afr_egfr_70[df_ntile_norm_afr_egfr_70$IID %in% afr$IID,])

In [None]:
df_ntile_norm_afr_egfr_50 = df_ntile_norm_afr_egfr_count %>%
filter(COUNT_PGS >= 50)
nrow(df_ntile_norm_afr_egfr_50)
nrow(df_ntile_norm_afr_egfr_50[df_ntile_norm_afr_egfr_50$IID %in% eur$IID,])
nrow(df_ntile_norm_afr_egfr_50[df_ntile_norm_afr_egfr_50$IID %in% afr$IID,])

#### bottom 2%

In [None]:
df_ntile_norm_afr_egfr_90_low = df_ntile_norm_afr_egfr_count_low %>%
filter(COUNT_PGS >= 90)
nrow(df_ntile_norm_afr_egfr_90_low)
nrow(df_ntile_norm_afr_egfr_90_low[df_ntile_norm_afr_egfr_90_low$IID %in% eur$IID,])
nrow(df_ntile_norm_afr_egfr_90_low[df_ntile_norm_afr_egfr_90_low$IID %in% afr$IID,])

In [None]:
df_ntile_norm_afr_egfr_80_low = df_ntile_norm_afr_egfr_count_low %>%
filter(COUNT_PGS >= 80)
nrow(df_ntile_norm_afr_egfr_80_low)
nrow(df_ntile_norm_afr_egfr_80_low[df_ntile_norm_afr_egfr_80_low$IID %in% eur$IID,])
nrow(df_ntile_norm_afr_egfr_80_low[df_ntile_norm_afr_egfr_80_low$IID %in% afr$IID,])

In [None]:
df_ntile_norm_afr_egfr_70_low = df_ntile_norm_afr_egfr_count_low %>%
filter(COUNT_PGS >= 70)
nrow(df_ntile_norm_afr_egfr_70_low)
nrow(df_ntile_norm_afr_egfr_70_low[df_ntile_norm_afr_egfr_70_low$IID %in% eur$IID,])
nrow(df_ntile_norm_afr_egfr_70_low[df_ntile_norm_afr_egfr_70_low$IID %in% afr$IID,])

In [None]:
df_ntile_norm_afr_egfr_50_low = df_ntile_norm_afr_egfr_count_low %>%
filter(COUNT_PGS >= 50)
nrow(df_ntile_norm_afr_egfr_50_low)
nrow(df_ntile_norm_afr_egfr_50_low[df_ntile_norm_afr_egfr_50_low$IID %in% eur$IID,])
nrow(df_ntile_norm_afr_egfr_50_low[df_ntile_norm_afr_egfr_50_low$IID %in% afr$IID,])

In [None]:
df_ntile_norm_afr_phe_70 = df_ntile_norm_afr_phe_count%>%
filter(COUNT_PGS >= 70)
nrow(df_ntile_norm_afr_phe_70)
nrow(df_ntile_norm_afr_phe_70[df_ntile_norm_afr_phe_70$IID %in% eur$IID,])
nrow(df_ntile_norm_afr_phe_70[df_ntile_norm_afr_phe_70$IID %in% afr$IID,])

In [None]:
df_ntile_norm_afr_phe_50 = df_ntile_norm_afr_phe_count%>%
filter(COUNT_PGS >= 50)
nrow(df_ntile_norm_afr_phe_50)
nrow(df_ntile_norm_afr_phe_50[df_ntile_norm_afr_phe_50$IID %in% eur$IID,])
nrow(df_ntile_norm_afr_phe_50[df_ntile_norm_afr_phe_50$IID %in% afr$IID,])

In [None]:
df_ntile_norm_amr_egfr_50 = df_ntile_norm_amr_egfr_count%>%
filter(COUNT_PGS >= 50)
nrow(df_ntile_norm_amr_egfr_50)
nrow(df_ntile_norm_amr_egfr_50[df_ntile_norm_amr_egfr_50$IID %in% eur$IID,])
nrow(df_ntile_norm_amr_egfr_50[df_ntile_norm_amr_egfr_50$IID %in% afr$IID,])

### eur egfr

#### top 2%

In [None]:
df_ntile_norm_eur_egfr_95 = df_ntile_norm_eur_egfr_count %>%
filter(COUNT_PGS >= 95)
nrow(df_ntile_norm_eur_egfr_95)
nrow(df_ntile_norm_eur_egfr_95[df_ntile_norm_eur_egfr_95$IID %in% eur$IID,])
nrow(df_ntile_norm_eur_egfr_95[df_ntile_norm_eur_egfr_95$IID %in% afr$IID,])

In [None]:
df_ntile_norm_eur_egfr_90 = df_ntile_norm_eur_egfr_count %>%
filter(COUNT_PGS >= 90)
nrow(df_ntile_norm_eur_egfr_90)
nrow(df_ntile_norm_eur_egfr_90[df_ntile_norm_eur_egfr_90$IID %in% eur$IID,])
nrow(df_ntile_norm_eur_egfr_90[df_ntile_norm_eur_egfr_90$IID %in% afr$IID,])

In [None]:
df_ntile_norm_eur_egfr_80 = df_ntile_norm_eur_egfr_count %>%
filter(COUNT_PGS >= 80)
nrow(df_ntile_norm_eur_egfr_80)
nrow(df_ntile_norm_eur_egfr_80[df_ntile_norm_eur_egfr_80$IID %in% eur$IID,])
nrow(df_ntile_norm_eur_egfr_80[df_ntile_norm_eur_egfr_80$IID %in% afr$IID,])

In [None]:
df_ntile_norm_eur_egfr_70 = df_ntile_norm_eur_egfr_count %>%
filter(COUNT_PGS >= 70)
nrow(df_ntile_norm_eur_egfr_70)
nrow(df_ntile_norm_eur_egfr_70[df_ntile_norm_eur_egfr_70$IID %in% eur$IID,])
nrow(df_ntile_norm_eur_egfr_70[df_ntile_norm_eur_egfr_70$IID %in% afr$IID,])

In [None]:
df_ntile_norm_eur_egfr_50 = df_ntile_norm_eur_egfr_count %>%
filter(COUNT_PGS >= 50)
nrow(df_ntile_norm_eur_egfr_50)
nrow(df_ntile_norm_eur_egfr_50[df_ntile_norm_eur_egfr_50$IID %in% eur$IID,])
nrow(df_ntile_norm_eur_egfr_50[df_ntile_norm_eur_egfr_50$IID %in% afr$IID,])

#### bottom 2%

In [None]:
df_ntile_norm_eur_egfr_95_low = df_ntile_norm_eur_egfr_count_low %>%
filter(COUNT_PGS >= 95)
nrow(df_ntile_norm_eur_egfr_95_low)
nrow(df_ntile_norm_eur_egfr_95_low[df_ntile_norm_eur_egfr_95_low$IID %in% eur$IID,])
nrow(df_ntile_norm_eur_egfr_95_low[df_ntile_norm_eur_egfr_95_low$IID %in% afr$IID,])

In [None]:
df_ntile_norm_eur_egfr_90_low = df_ntile_norm_eur_egfr_count_low %>%
filter(COUNT_PGS >= 90)
nrow(df_ntile_norm_eur_egfr_90_low)
nrow(df_ntile_norm_eur_egfr_90_low[df_ntile_norm_eur_egfr_90_low$IID %in% eur$IID,])
nrow(df_ntile_norm_eur_egfr_90_low[df_ntile_norm_eur_egfr_90_low$IID %in% afr$IID,])

In [None]:
df_ntile_norm_eur_egfr_80_low = df_ntile_norm_eur_egfr_count_low %>%
filter(COUNT_PGS >= 80)
nrow(df_ntile_norm_eur_egfr_80_low)
nrow(df_ntile_norm_eur_egfr_80_low[df_ntile_norm_eur_egfr_80_low$IID %in% eur$IID,])
nrow(df_ntile_norm_eur_egfr_80_low[df_ntile_norm_eur_egfr_80_low$IID %in% afr$IID,])

In [None]:
df_ntile_norm_eur_egfr_70_low = df_ntile_norm_eur_egfr_count_low %>%
filter(COUNT_PGS >= 70)
nrow(df_ntile_norm_eur_egfr_70_low)
nrow(df_ntile_norm_eur_egfr_70_low[df_ntile_norm_eur_egfr_70_low$IID %in% eur$IID,])
nrow(df_ntile_norm_eur_egfr_70_low[df_ntile_norm_eur_egfr_70_low$IID %in% afr$IID,])

In [None]:
df_ntile_norm_eur_egfr_50_low = df_ntile_norm_eur_egfr_count_low %>%
filter(COUNT_PGS >= 50)
nrow(df_ntile_norm_eur_egfr_50_low)
nrow(df_ntile_norm_eur_egfr_50_low[df_ntile_norm_eur_egfr_50_low$IID %in% eur$IID,])
nrow(df_ntile_norm_eur_egfr_50_low[df_ntile_norm_eur_egfr_50_low$IID %in% afr$IID,])

In [None]:
df_ntile_norm_eur_phe_50 = df_ntile_norm_eur_phe_count%>%
filter(COUNT_PGS >= 50)
nrow(df_ntile_norm_eur_phe_50)
nrow(df_ntile_norm_eur_phe_50[df_ntile_norm_eur_phe_50$IID %in% eur$IID,])
nrow(df_ntile_norm_eur_phe_50[df_ntile_norm_eur_phe_50$IID %in% afr$IID,])

In [None]:
df_ntile_norm_eas_egfr_70 = df_ntile_norm_eas_egfr_count%>%
filter(COUNT_PGS >= 70)
nrow(df_ntile_norm_eas_egfr_70)
nrow(df_ntile_norm_eas_egfr_70[df_ntile_norm_eas_egfr_70$IID %in% eur$IID,])
nrow(df_ntile_norm_eas_egfr_70[df_ntile_norm_eas_egfr_70$IID %in% afr$IID,])

In [None]:
df_ntile_norm_eas_egfr_50 = df_ntile_norm_eas_egfr_count%>%
filter(COUNT_PGS >= 50)
nrow(df_ntile_norm_eas_egfr_50)
nrow(df_ntile_norm_eas_egfr_50[df_ntile_norm_eas_egfr_50$IID %in% eur$IID,])
nrow(df_ntile_norm_eas_egfr_50[df_ntile_norm_eas_egfr_50$IID %in% afr$IID,])

### meta egfr

#### top 2%

In [None]:
df_ntile_norm_meta_egfr_95 = df_ntile_norm_meta_egfr_count %>%
filter(COUNT_PGS >= (95 * 4))
nrow(df_ntile_norm_meta_egfr_95)
nrow(df_ntile_norm_meta_egfr_95[df_ntile_norm_meta_egfr_95$IID %in% eur$IID,])
nrow(df_ntile_norm_meta_egfr_95[df_ntile_norm_meta_egfr_95$IID %in% afr$IID,])

In [None]:
df_ntile_norm_meta_egfr_90 = df_ntile_norm_meta_egfr_count %>%
filter(COUNT_PGS >= (90 * 4))
nrow(df_ntile_norm_meta_egfr_90)
nrow(df_ntile_norm_meta_egfr_90[df_ntile_norm_meta_egfr_90$IID %in% eur$IID,])
nrow(df_ntile_norm_meta_egfr_90[df_ntile_norm_meta_egfr_90$IID %in% afr$IID,])

In [None]:
df_ntile_norm_meta_egfr_80 = df_ntile_norm_meta_egfr_count %>%
filter(COUNT_PGS >= (80 * 4))
nrow(df_ntile_norm_meta_egfr_80)
nrow(df_ntile_norm_meta_egfr_80[df_ntile_norm_meta_egfr_80$IID %in% eur$IID,])
nrow(df_ntile_norm_meta_egfr_80[df_ntile_norm_meta_egfr_80$IID %in% afr$IID,])

In [None]:
df_ntile_norm_meta_egfr_70 = df_ntile_norm_meta_egfr_count %>%
filter(COUNT_PGS >= (70 * 4))
nrow(df_ntile_norm_meta_egfr_70)
nrow(df_ntile_norm_meta_egfr_70[df_ntile_norm_meta_egfr_70$IID %in% eur$IID,])
nrow(df_ntile_norm_meta_egfr_70[df_ntile_norm_meta_egfr_70$IID %in% afr$IID,])

In [None]:
df_ntile_norm_meta_egfr_50 = df_ntile_norm_meta_egfr_count %>%
filter(COUNT_PGS >= (50 * 4))
nrow(df_ntile_norm_meta_egfr_50)
nrow(df_ntile_norm_meta_egfr_50[df_ntile_norm_meta_egfr_50$IID %in% eur$IID,])
nrow(df_ntile_norm_meta_egfr_50[df_ntile_norm_meta_egfr_50$IID %in% afr$IID,])

#### bottom 2%

In [None]:
df_ntile_norm_meta_egfr_95_low = df_ntile_norm_meta_egfr_count_low %>%
filter(COUNT_PGS >= (95 * 4))
nrow(df_ntile_norm_meta_egfr_95_low)
nrow(df_ntile_norm_meta_egfr_95_low[df_ntile_norm_meta_egfr_95_low$IID %in% eur$IID,])
nrow(df_ntile_norm_meta_egfr_95_low[df_ntile_norm_meta_egfr_95_low$IID %in% afr$IID,])

In [None]:
df_ntile_norm_meta_egfr_90_low = df_ntile_norm_meta_egfr_count_low %>%
filter(COUNT_PGS >= (90 * 4))
nrow(df_ntile_norm_meta_egfr_90_low)
nrow(df_ntile_norm_meta_egfr_90_low[df_ntile_norm_meta_egfr_90_low$IID %in% eur$IID,])
nrow(df_ntile_norm_meta_egfr_90_low[df_ntile_norm_meta_egfr_90_low$IID %in% afr$IID,])

In [None]:
df_ntile_norm_meta_egfr_80_low = df_ntile_norm_meta_egfr_count_low %>%
filter(COUNT_PGS >= (80 * 4))
nrow(df_ntile_norm_meta_egfr_80_low)
nrow(df_ntile_norm_meta_egfr_80_low[df_ntile_norm_meta_egfr_80_low$IID %in% eur$IID,])
nrow(df_ntile_norm_meta_egfr_80_low[df_ntile_norm_meta_egfr_80_low$IID %in% afr$IID,])

In [None]:
df_ntile_norm_meta_egfr_70_low = df_ntile_norm_meta_egfr_count_low %>%
filter(COUNT_PGS >= (70 * 4))
nrow(df_ntile_norm_meta_egfr_70_low)
nrow(df_ntile_norm_meta_egfr_70_low[df_ntile_norm_meta_egfr_70_low$IID %in% eur$IID,])
nrow(df_ntile_norm_meta_egfr_70_low[df_ntile_norm_meta_egfr_70_low$IID %in% afr$IID,])

In [None]:
df_ntile_norm_meta_egfr_50_low = df_ntile_norm_meta_egfr_count_low %>%
filter(COUNT_PGS >= (50 * 4))
nrow(df_ntile_norm_meta_egfr_50_low)
nrow(df_ntile_norm_meta_egfr_50_low[df_ntile_norm_meta_egfr_50_low$IID %in% eur$IID,])
nrow(df_ntile_norm_meta_egfr_50_low[df_ntile_norm_meta_egfr_50_low$IID %in% afr$IID,])

# calculate stability

## read in input files (if needed)

### all

In [None]:
model_metrics_df = read.csv('output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                            sep = '\t')

### all individuals multiancestry scores

In [None]:
model_metrics_df_multiancestry_score = read.csv('output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.performance_metrics.txt',
                                                sep = '\t')

### eur

In [None]:
model_metrics_df_eur = read.csv('output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                                sep = '\t')

### eur individuals multiancestry scores

In [None]:
model_metrics_df_eur_multiancestry_score = read.csv('output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.performance_metrics.txt',
                                                    sep = '\t')

### afr

In [None]:
model_metrics_df_afr = read.csv('output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                                sep = '\t')

### afr individuals multiancestry scores

In [None]:
model_metrics_df_afr_multiancestry_score = read.csv('output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.performance_metrics.txt',
                                                    sep = '\t')

## create function (if needed)

In [None]:
equiv_scores <- function(df, criteria, ntile = FALSE) {
    # Ensure criteria is a column in df
    if (!criteria %in% names(df)) {
        stop(paste0("Criteria column ", criteria, " not found in dataframe"))
    }
  
    # Filter models based on criteria for each group
    models <- df %>%
    group_by(model) %>%
    filter(all(.data[[criteria]] == 1)) %>%
    ungroup() %>%
    pull(model) %>%
    unique()
  
    # Sort models
    sorted_models <- models
  
    # Return the appropriate list based on ntile flag
    if (ntile) {
        ntile_list <- paste("ntile_", sorted_models, sep = "")
        return(ntile_list)
    } else {
        return(sorted_models)
    }
}

## simulate null data

In [None]:
sim_null = df_ntile_norm %>%
select(IID, PGS) %>%
mutate(ntile = rtruncnorm(n(), a = 0, b = 100, mean = 50, sd = 15))
nrow(df_ntile_norm)
nrow(sim_null)
head(sim_null)

In [None]:
summary(df_ntile_norm$ntile)

In [None]:
summary(sim_null$ntile)

## calculate number of >=98th percentile classifications per each score and individual

### all

In [None]:
ntile_list <- equiv_scores(model_metrics_df, "ROPE_02", ntile = FALSE)
n_scores = length(ntile_list)
print(n_scores)

n_class = df_ntile_norm %>%
filter(PGS %in% ntile_list) %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n_distinct(PGS)) %>%
mutate(STABILITY = (COUNT_PGS/n_scores) * 100)

head(n_class)
summary(n_class$COUNT_PGS)
summary(n_class$STABILITY)
length(unique(n_class$IID))

### all individuals multiancestry scores

In [None]:
ntile_list <- equiv_scores(model_metrics_df_multiancestry_score, "ROPE_02", ntile = FALSE)
n_scores = length(ntile_list)
print(n_scores)

n_class_multiancestry_score = df_ntile_norm_multiancestry_score %>%
filter(PGS %in% ntile_list) %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n_distinct(PGS)) %>%
mutate(STABILITY = (COUNT_PGS/n_scores) * 100)

head(n_class_multiancestry_score)
summary(n_class_multiancestry_score$COUNT_PGS)
summary(n_class_multiancestry_score$STABILITY)
length(unique(n_class_multiancestry_score$IID))

### eur

In [None]:
ntile_list <- equiv_scores(model_metrics_df, "ROPE_02", ntile = FALSE)
n_scores = length(ntile_list)
print(n_scores)

n_class_eur = df_ntile_norm_eur %>%
filter(PGS %in% ntile_list) %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n_distinct(PGS)) %>%
mutate(STABILITY = (COUNT_PGS/n_scores) * 100)

head(n_class_eur)
summary(n_class_eur$COUNT_PGS)
summary(n_class_eur$STABILITY)
length(unique(n_class_eur$IID))

### eur individuals multiancestry scores

In [None]:
ntile_list <- equiv_scores(model_metrics_df_multiancestry_score, "ROPE_02", ntile = FALSE)
n_scores = length(ntile_list)
print(n_scores)

n_class_eur_multiancestry_score = df_ntile_norm_eur_multiancestry_score %>%
filter(PGS %in% ntile_list) %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n_distinct(PGS)) %>%
mutate(STABILITY = (COUNT_PGS/n_scores) * 100)

head(n_class_eur_multiancestry_score)
summary(n_class_eur_multiancestry_score$COUNT_PGS)
summary(n_class_eur_multiancestry_score$STABILITY)
length(unique(n_class_eur_multiancestry_score$IID))

### afr

In [None]:
ntile_list <- equiv_scores(model_metrics_df, "ROPE_02", ntile = FALSE)
n_scores = length(ntile_list)
print(n_scores)

n_class_afr = df_ntile_norm_afr %>%
filter(PGS %in% ntile_list) %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n_distinct(PGS)) %>%
mutate(STABILITY = (COUNT_PGS/n_scores) * 100)

head(n_class_afr)
summary(n_class_afr$COUNT_PGS)
summary(n_class_afr$STABILITY)
length(unique(n_class_afr$IID))

### afr individuals multiancestry scores

In [None]:
ntile_list <- equiv_scores(model_metrics_df_multiancestry_score, "ROPE_02", ntile = FALSE)
n_scores = length(ntile_list)
print(n_scores)

n_class_afr_multiancestry_score = df_ntile_norm_afr_multiancestry_score %>%
filter(PGS %in% ntile_list) %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n_distinct(PGS)) %>%
mutate(STABILITY = (COUNT_PGS/n_scores) * 100)

head(n_class_afr_multiancestry_score)
summary(n_class_afr_multiancestry_score$COUNT_PGS)
summary(n_class_afr_multiancestry_score$STABILITY)
length(unique(n_class_afr_multiancestry_score$IID))

### simulated data- all scores

In [None]:
ntile_list <- equiv_scores(model_metrics_df, "ROPE_02", ntile = FALSE)
n_scores = length(ntile_list)
print(n_scores)

n_class_sim_null = sim_null %>%
filter(PGS %in% ntile_list) %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n_distinct(PGS)) %>%
mutate(STABILITY = (COUNT_PGS/n_scores) * 100)

head(n_class_sim_null)
summary(n_class_sim_null$COUNT_PGS)
summary(n_class_sim_null$STABILITY)
length(unique(n_class_sim_null$IID))

### simulated data- eur/multiancestry scores only

In [None]:
ntile_list <- equiv_scores(model_metrics_df_multiancestry_score, "ROPE_02", ntile = FALSE)
n_scores = length(ntile_list)
print(n_scores)

n_class_sim_null_multiancestry_score = sim_null %>%
filter(PGS %in% ntile_list) %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n_distinct(PGS)) %>%
mutate(STABILITY = (COUNT_PGS/n_scores) * 100)

head(n_class_sim_null_multiancestry_score)
summary(n_class_sim_null_multiancestry_score$COUNT_PGS)
summary(n_class_sim_null_multiancestry_score$STABILITY)
length(unique(n_class_sim_null_multiancestry_score$IID))

## calculate number of <=2nd percentile classifications per each score and individual

### all

In [None]:
ntile_list <- equiv_scores(model_metrics_df, "ROPE_02", ntile = FALSE)
n_scores = length(ntile_list)
print(n_scores)

n_class_low = df_ntile_norm %>%
filter(PGS %in% ntile_list) %>%
filter(ntile <= 2) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n_distinct(PGS)) %>%
mutate(STABILITY = (COUNT_PGS/n_scores) * 100)

head(n_class_low)
summary(n_class_low$COUNT_PGS)
summary(n_class_low$STABILITY)
length(unique(n_class_low$IID))

### all individuals multiancesty scores

In [None]:
ntile_list <- equiv_scores(model_metrics_df_multiancestry_score, "ROPE_02", ntile = FALSE)
n_scores = length(ntile_list)
print(n_scores)

n_class_low_multiancestry_score = df_ntile_norm_multiancestry_score %>%
filter(PGS %in% ntile_list) %>%
filter(ntile <= 2) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n_distinct(PGS)) %>%
mutate(STABILITY = (COUNT_PGS/n_scores) * 100)

head(n_class_low_multiancestry_score)
summary(n_class_low_multiancestry_score$COUNT_PGS)
summary(n_class_low_multiancestry_score$STABILITY)
length(unique(n_class_low_multiancestry_score$IID))

### eur

In [None]:
ntile_list <- equiv_scores(model_metrics_df, "ROPE_02", ntile = FALSE)
n_scores = length(ntile_list)
print(n_scores)

n_class_low_eur = df_ntile_norm_eur %>%
filter(PGS %in% ntile_list) %>%
filter(ntile <= 2) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n_distinct(PGS)) %>%
mutate(STABILITY = (COUNT_PGS/n_scores) * 100)

head(n_class_low_eur)
summary(n_class_low_eur$COUNT_PGS)
summary(n_class_low_eur$STABILITY)
length(unique(n_class_low_eur$IID))

### eur individuals multiancestry scores

In [None]:
ntile_list <- equiv_scores(model_metrics_df_multiancestry_score, "ROPE_02", ntile = FALSE)
n_scores = length(ntile_list)
print(n_scores)

n_class_low_eur_multiancestry_score = df_ntile_norm_eur_multiancestry_score %>%
filter(PGS %in% ntile_list) %>%
filter(ntile <= 2) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n_distinct(PGS)) %>%
mutate(STABILITY = (COUNT_PGS/n_scores) * 100)

head(n_class_low_eur_multiancestry_score)
summary(n_class_low_eur_multiancestry_score$COUNT_PGS)
summary(n_class_low_eur_multiancestry_score$STABILITY)
length(unique(n_class_low_eur_multiancestry_score$IID))

### afr

In [None]:
ntile_list <- equiv_scores(model_metrics_df, "ROPE_02", ntile = FALSE)
n_scores = length(ntile_list)
print(n_scores)

n_class_low_afr = df_ntile_norm_afr %>%
filter(PGS %in% ntile_list) %>%
filter(ntile <= 2) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n_distinct(PGS)) %>%
mutate(STABILITY = (COUNT_PGS/n_scores) * 100)

head(n_class_low_afr)
summary(n_class_low_afr$COUNT_PGS)
summary(n_class_low_afr$STABILITY)
length(unique(n_class_low_afr$IID))

### afr individuals multiancestry scores

In [None]:
ntile_list <- equiv_scores(model_metrics_df_multiancestry_score, "ROPE_02", ntile = FALSE)
n_scores = length(ntile_list)
print(n_scores)

n_class_low_afr_multiancestry_score = df_ntile_norm_afr_multiancestry_score %>%
filter(PGS %in% ntile_list) %>%
filter(ntile <= 2) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n_distinct(PGS)) %>%
mutate(STABILITY = (COUNT_PGS/n_scores) * 100)

head(n_class_low_afr_multiancestry_score)
summary(n_class_low_afr_multiancestry_score$COUNT_PGS)
summary(n_class_low_afr_multiancestry_score$STABILITY)
length(unique(n_class_low_afr_multiancestry_score$IID))

### simulated data- all scores

In [None]:
ntile_list <- equiv_scores(model_metrics_df, "ROPE_02", ntile = FALSE)
n_scores = length(ntile_list)
print(n_scores)

n_class_low_sim_null = sim_null %>%
filter(PGS %in% ntile_list) %>%
filter(ntile <= 2) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n_distinct(PGS)) %>%
mutate(STABILITY = (COUNT_PGS/n_scores) * 100)

head(n_class_low_sim_null)
summary(n_class_low_sim_null$COUNT_PGS)
summary(n_class_low_sim_null$STABILITY)
length(unique(n_class_low_sim_null$IID))

### simulated data- eur/multiancestry scores

In [None]:
ntile_list <- equiv_scores(model_metrics_df_multiancestry_score, "ROPE_02", ntile = FALSE)
n_scores = length(ntile_list)
print(n_scores)

n_class_low_sim_null_multiancestry_score = sim_null %>%
filter(PGS %in% ntile_list) %>%
filter(ntile <= 2) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n_distinct(PGS)) %>%
mutate(STABILITY = (COUNT_PGS/n_scores) * 100)

head(n_class_low_sim_null_multiancestry_score)
summary(n_class_low_sim_null_multiancestry_score$COUNT_PGS)
summary(n_class_low_sim_null_multiancestry_score$STABILITY)
length(unique(n_class_low_sim_null_multiancestry_score$IID))

## filter to high confidence individuals >= 98th percentile

### all

#### eur eGFR

In [None]:
n_class_eur_egfr_95 = n_class[n_class$IID %in% df_ntile_norm_eur_egfr_95$IID,]

summary(n_class_eur_egfr_95$COUNT_PGS)
summary(n_class_eur_egfr_95$STABILITY)
length(unique(n_class_eur_egfr_95$IID))

In [None]:
n_class_eur_egfr_90 = n_class[n_class$IID %in% df_ntile_norm_eur_egfr_90$IID,]

summary(n_class_eur_egfr_90$COUNT_PGS)
summary(n_class_eur_egfr_90$STABILITY)
length(unique(n_class_eur_egfr_90$IID))

In [None]:
n_class_eur_egfr_80 = n_class[n_class$IID %in% df_ntile_norm_eur_egfr_80$IID,]

summary(n_class_eur_egfr_80$COUNT_PGS)
summary(n_class_eur_egfr_80$STABILITY)
length(unique(n_class_eur_egfr_80$IID))

In [None]:
n_class_eur_egfr_70 = n_class[n_class$IID %in% df_ntile_norm_eur_egfr_70$IID,]

summary(n_class_eur_egfr_70$COUNT_PGS)
summary(n_class_eur_egfr_70$STABILITY)
length(unique(n_class_eur_egfr_70$IID))

In [None]:
n_class_eur_egfr_50 = n_class[n_class$IID %in% df_ntile_norm_eur_egfr_50$IID,]

summary(n_class_eur_egfr_50$COUNT_PGS)
summary(n_class_eur_egfr_50$STABILITY)
length(unique(n_class_eur_egfr_50$IID))

#### afr egfr

In [None]:
n_class_afr_egfr_80 = n_class[n_class$IID %in% df_ntile_norm_afr_egfr_80$IID,]

summary(n_class_afr_egfr_80$COUNT_PGS)
summary(n_class_afr_egfr_80$STABILITY)
length(unique(n_class_afr_egfr_80$IID))

In [None]:
n_class_afr_egfr_70 = n_class[n_class$IID %in% df_ntile_norm_afr_egfr_70$IID,]

summary(n_class_afr_egfr_70$COUNT_PGS)
summary(n_class_afr_egfr_70$STABILITY)
length(unique(n_class_afr_egfr_70$IID))

In [None]:
n_class_afr_egfr_50 = n_class[n_class$IID %in% df_ntile_norm_afr_egfr_50$IID,]

summary(n_class_afr_egfr_50$COUNT_PGS)
summary(n_class_afr_egfr_50$STABILITY)
length(unique(n_class_afr_egfr_50$IID))

#### meta egfr

In [None]:
n_class_meta_egfr_95 = n_class[n_class$IID %in% df_ntile_norm_meta_egfr_95$IID,]

summary(n_class_meta_egfr_95$COUNT_PGS)
summary(n_class_meta_egfr_95$STABILITY)
length(unique(n_class_meta_egfr_95$IID))

In [None]:
n_class_meta_egfr_90 = n_class[n_class$IID %in% df_ntile_norm_meta_egfr_90$IID,]

summary(n_class_meta_egfr_90$COUNT_PGS)
summary(n_class_meta_egfr_90$STABILITY)
length(unique(n_class_meta_egfr_90$IID))

In [None]:
n_class_meta_egfr_80 = n_class[n_class$IID %in% df_ntile_norm_meta_egfr_80$IID,]

summary(n_class_meta_egfr_80$COUNT_PGS)
summary(n_class_meta_egfr_80$STABILITY)
length(unique(n_class_meta_egfr_80$IID))

In [None]:
n_class_meta_egfr_70 = n_class[n_class$IID %in% df_ntile_norm_meta_egfr_70$IID,]

summary(n_class_meta_egfr_70$COUNT_PGS)
summary(n_class_meta_egfr_70$STABILITY)
length(unique(n_class_meta_egfr_70$IID))

In [None]:
n_class_meta_egfr_50 = n_class[n_class$IID %in% df_ntile_norm_meta_egfr_50$IID,]

summary(n_class_meta_egfr_50$COUNT_PGS)
summary(n_class_meta_egfr_50$STABILITY)
length(unique(n_class_meta_egfr_50$IID))

### all individuals multiancestry scores

#### eur egfr

In [None]:
n_class_multiancestry_score_eur_egfr_95 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_95$IID,]

summary(n_class_multiancestry_score_eur_egfr_95$COUNT_PGS)
summary(n_class_multiancestry_score_eur_egfr_95$STABILITY)
length(unique(n_class_multiancestry_score_eur_egfr_95$IID))

In [None]:
n_class_multiancestry_score_eur_egfr_90 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_90$IID,]

summary(n_class_multiancestry_score_eur_egfr_90$COUNT_PGS)
summary(n_class_multiancestry_score_eur_egfr_90$STABILITY)
length(unique(n_class_multiancestry_score_eur_egfr_90$IID))

In [None]:
n_class_multiancestry_score_eur_egfr_80 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_80$IID,]

summary(n_class_multiancestry_score_eur_egfr_80$COUNT_PGS)
summary(n_class_multiancestry_score_eur_egfr_80$STABILITY)
length(unique(n_class_multiancestry_score_eur_egfr_80$IID))

In [None]:
n_class_multiancestry_score_eur_egfr_70 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_70$IID,]

summary(n_class_multiancestry_score_eur_egfr_70$COUNT_PGS)
summary(n_class_multiancestry_score_eur_egfr_70$STABILITY)
length(unique(n_class_multiancestry_score_eur_egfr_70$IID))

In [None]:
n_class_multiancestry_score_eur_egfr_50 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_50$IID,]

summary(n_class_multiancestry_score_eur_egfr_50$COUNT_PGS)
summary(n_class_multiancestry_score_eur_egfr_50$STABILITY)
length(unique(n_class_multiancestry_score_eur_egfr_50$IID))

#### afr egfr

In [None]:
n_class_multiancestry_score_afr_egfr_80 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_80$IID,]

summary(n_class_multiancestry_score_afr_egfr_80$COUNT_PGS)
summary(n_class_multiancestry_score_afr_egfr_80$STABILITY)
length(unique(n_class_multiancestry_score_afr_egfr_80$IID))

In [None]:
n_class_multiancestry_score_afr_egfr_70 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_70$IID,]

summary(n_class_multiancestry_score_afr_egfr_70$COUNT_PGS)
summary(n_class_multiancestry_score_afr_egfr_70$STABILITY)
length(unique(n_class_multiancestry_score_afr_egfr_70$IID))

In [None]:
n_class_multiancestry_score_afr_egfr_50 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_50$IID,]

summary(n_class_multiancestry_score_afr_egfr_50$COUNT_PGS)
summary(n_class_multiancestry_score_afr_egfr_50$STABILITY)
length(unique(n_class_multiancestry_score_afr_egfr_50$IID))

#### meta egfr

In [None]:
n_class_multiancestry_score_meta_egfr_95 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_95$IID,]

summary(n_class_multiancestry_score_meta_egfr_95$COUNT_PGS)
summary(n_class_multiancestry_score_meta_egfr_95$STABILITY)
length(unique(n_class_multiancestry_score_meta_egfr_95$IID))

In [None]:
n_class_multiancestry_score_meta_egfr_90 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_90$IID,]

summary(n_class_multiancestry_score_meta_egfr_90$COUNT_PGS)
summary(n_class_multiancestry_score_meta_egfr_90$STABILITY)
length(unique(n_class_multiancestry_score_meta_egfr_90$IID))

In [None]:
n_class_multiancestry_score_meta_egfr_80 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_80$IID,]

summary(n_class_multiancestry_score_meta_egfr_80$COUNT_PGS)
summary(n_class_multiancestry_score_meta_egfr_80$STABILITY)
length(unique(n_class_multiancestry_score_meta_egfr_80$IID))

In [None]:
n_class_multiancestry_score_meta_egfr_70 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_70$IID,]

summary(n_class_multiancestry_score_meta_egfr_70$COUNT_PGS)
summary(n_class_multiancestry_score_meta_egfr_70$STABILITY)
length(unique(n_class_multiancestry_score_meta_egfr_70$IID))

In [None]:
n_class_multiancestry_score_meta_egfr_50 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_50$IID,]

summary(n_class_multiancestry_score_meta_egfr_50$COUNT_PGS)
summary(n_class_multiancestry_score_meta_egfr_50$STABILITY)
length(unique(n_class_multiancestry_score_meta_egfr_50$IID))

### eur

#### eur egfr

In [None]:
n_class_eur_indiv_eur_egfr_95 = n_class_eur[n_class_eur$IID %in% df_ntile_norm_eur_egfr_95$IID,]

summary(n_class_eur_indiv_eur_egfr_95$COUNT_PGS)
summary(n_class_eur_indiv_eur_egfr_95$STABILITY)
length(unique(n_class_eur_indiv_eur_egfr_95$IID))

In [None]:
n_class_eur_indiv_eur_egfr_90 = n_class_eur[n_class_eur$IID %in% df_ntile_norm_eur_egfr_90$IID,]

summary(n_class_eur_indiv_eur_egfr_90$COUNT_PGS)
summary(n_class_eur_indiv_eur_egfr_90$STABILITY)
length(unique(n_class_eur_indiv_eur_egfr_90$IID))

In [None]:
n_class_eur_indiv_eur_egfr_80 = n_class_eur[n_class_eur$IID %in% df_ntile_norm_eur_egfr_80$IID,]

summary(n_class_eur_indiv_eur_egfr_80$COUNT_PGS)
summary(n_class_eur_indiv_eur_egfr_80$STABILITY)
length(unique(n_class_eur_indiv_eur_egfr_80$IID))

In [None]:
n_class_eur_indiv_eur_egfr_70 = n_class_eur[n_class_eur$IID %in% df_ntile_norm_eur_egfr_70$IID,]

summary(n_class_eur_indiv_eur_egfr_70$COUNT_PGS)
summary(n_class_eur_indiv_eur_egfr_70$STABILITY)
length(unique(n_class_eur_indiv_eur_egfr_70$IID))

In [None]:
n_class_eur_indiv_eur_egfr_50 = n_class_eur[n_class_eur$IID %in% df_ntile_norm_eur_egfr_50$IID,]

summary(n_class_eur_indiv_eur_egfr_50$COUNT_PGS)
summary(n_class_eur_indiv_eur_egfr_50$STABILITY)
length(unique(n_class_eur_indiv_eur_egfr_50$IID))

#### meta egfr

In [None]:
n_class_eur_indiv_meta_egfr_95 = n_class_eur[n_class_eur$IID %in% df_ntile_norm_meta_egfr_95$IID,]

summary(n_class_eur_indiv_meta_egfr_95$COUNT_PGS)
summary(n_class_eur_indiv_meta_egfr_95$STABILITY)
length(unique(n_class_eur_indiv_meta_egfr_95$IID))

In [None]:
n_class_eur_indiv_meta_egfr_90 = n_class_eur[n_class_eur$IID %in% df_ntile_norm_meta_egfr_90$IID,]

summary(n_class_eur_indiv_meta_egfr_90$COUNT_PGS)
summary(n_class_eur_indiv_meta_egfr_90$STABILITY)
length(unique(n_class_eur_indiv_meta_egfr_90$IID))

In [None]:
n_class_eur_indiv_meta_egfr_80 = n_class_eur[n_class_eur$IID %in% df_ntile_norm_meta_egfr_80$IID,]

summary(n_class_eur_indiv_meta_egfr_80$COUNT_PGS)
summary(n_class_eur_indiv_meta_egfr_80$STABILITY)
length(unique(n_class_eur_indiv_meta_egfr_80$IID))

In [None]:
n_class_eur_indiv_meta_egfr_70 = n_class_eur[n_class_eur$IID %in% df_ntile_norm_meta_egfr_70$IID,]

summary(n_class_eur_indiv_meta_egfr_70$COUNT_PGS)
summary(n_class_eur_indiv_meta_egfr_70$STABILITY)
length(unique(n_class_eur_indiv_meta_egfr_70$IID))

In [None]:
n_class_eur_indiv_meta_egfr_50 = n_class_eur[n_class_eur$IID %in% df_ntile_norm_meta_egfr_50$IID,]

summary(n_class_eur_indiv_meta_egfr_50$COUNT_PGS)
summary(n_class_eur_indiv_meta_egfr_50$STABILITY)
length(unique(n_class_eur_indiv_meta_egfr_50$IID))

### eur individuals multiancestry scores

#### eur egfr

In [None]:
n_class_eur_indiv_multiancestry_score_eur_egfr_95 = n_class_eur_multiancestry_score[n_class_eur_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_95$IID,]

summary(n_class_eur_indiv_multiancestry_score_eur_egfr_95$COUNT_PGS)
summary(n_class_eur_indiv_multiancestry_score_eur_egfr_95$STABILITY)
length(unique(n_class_eur_indiv_multiancestry_score_eur_egfr_95$IID))

In [None]:
n_class_eur_indiv_multiancestry_score_eur_egfr_90 = n_class_eur_multiancestry_score[n_class_eur_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_90$IID,]

summary(n_class_eur_indiv_multiancestry_score_eur_egfr_90$COUNT_PGS)
summary(n_class_eur_indiv_multiancestry_score_eur_egfr_90$STABILITY)
length(unique(n_class_eur_indiv_multiancestry_score_eur_egfr_90$IID))

In [None]:
n_class_eur_indiv_multiancestry_score_eur_egfr_80 = n_class_eur_multiancestry_score[n_class_eur_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_80$IID,]

summary(n_class_eur_indiv_multiancestry_score_eur_egfr_80$COUNT_PGS)
summary(n_class_eur_indiv_multiancestry_score_eur_egfr_80$STABILITY)
length(unique(n_class_eur_indiv_multiancestry_score_eur_egfr_80$IID))

In [None]:
n_class_eur_indiv_multiancestry_score_eur_egfr_70 = n_class_eur_multiancestry_score[n_class_eur_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_70$IID,]

summary(n_class_eur_indiv_multiancestry_score_eur_egfr_70$COUNT_PGS)
summary(n_class_eur_indiv_multiancestry_score_eur_egfr_70$STABILITY)
length(unique(n_class_eur_indiv_multiancestry_score_eur_egfr_70$IID))

In [None]:
n_class_eur_indiv_multiancestry_score_eur_egfr_50 = n_class_eur_multiancestry_score[n_class_eur_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_50$IID,]

summary(n_class_eur_indiv_multiancestry_score_eur_egfr_50$COUNT_PGS)
summary(n_class_eur_indiv_multiancestry_score_eur_egfr_50$STABILITY)
length(unique(n_class_eur_indiv_multiancestry_score_eur_egfr_50$IID))

#### meta egfr

In [None]:
n_class_eur_indiv_multiancestry_score_meta_egfr_95 = n_class_eur_multiancestry_score[n_class_eur_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_95$IID,]

summary(n_class_eur_indiv_multiancestry_score_meta_egfr_95$COUNT_PGS)
summary(n_class_eur_indiv_multiancestry_score_meta_egfr_95$STABILITY)
length(unique(n_class_eur_indiv_multiancestry_score_meta_egfr_95$IID))

In [None]:
n_class_eur_indiv_multiancestry_score_meta_egfr_90 = n_class_eur_multiancestry_score[n_class_eur_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_90$IID,]

summary(n_class_eur_indiv_multiancestry_score_meta_egfr_90$COUNT_PGS)
summary(n_class_eur_indiv_multiancestry_score_meta_egfr_90$STABILITY)
length(unique(n_class_eur_indiv_multiancestry_score_meta_egfr_90$IID))

In [None]:
n_class_eur_indiv_multiancestry_score_meta_egfr_80 = n_class_eur_multiancestry_score[n_class_eur_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_80$IID,]

summary(n_class_eur_indiv_multiancestry_score_meta_egfr_80$COUNT_PGS)
summary(n_class_eur_indiv_multiancestry_score_meta_egfr_80$STABILITY)
length(unique(n_class_eur_indiv_multiancestry_score_meta_egfr_80$IID))

In [None]:
n_class_eur_indiv_multiancestry_score_meta_egfr_70 = n_class_eur_multiancestry_score[n_class_eur_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_70$IID,]

summary(n_class_eur_indiv_multiancestry_score_meta_egfr_70$COUNT_PGS)
summary(n_class_eur_indiv_multiancestry_score_meta_egfr_70$STABILITY)
length(unique(n_class_eur_indiv_multiancestry_score_meta_egfr_70$IID))

In [None]:
n_class_eur_indiv_multiancestry_score_meta_egfr_50 = n_class_eur_multiancestry_score[n_class_eur_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_50$IID,]

summary(n_class_eur_indiv_multiancestry_score_meta_egfr_50$COUNT_PGS)
summary(n_class_eur_indiv_multiancestry_score_meta_egfr_50$STABILITY)
length(unique(n_class_eur_indiv_multiancestry_score_meta_egfr_50$IID))

### afr

#### afr egfr

In [None]:
n_class_afr_indiv_afr_egfr_80 = n_class_afr[n_class_afr$IID %in% df_ntile_norm_afr_egfr_80$IID,]

summary(n_class_afr_indiv_afr_egfr_80$COUNT_PGS)
summary(n_class_afr_indiv_afr_egfr_80$STABILITY)
length(unique(n_class_afr_indiv_afr_egfr_80$IID))

In [None]:
n_class_afr_indiv_afr_egfr_70 = n_class_afr[n_class_afr$IID %in% df_ntile_norm_afr_egfr_70$IID,]

summary(n_class_afr_indiv_afr_egfr_70$COUNT_PGS)
summary(n_class_afr_indiv_afr_egfr_70$STABILITY)
length(unique(n_class_afr_indiv_afr_egfr_70$IID))

In [None]:
n_class_afr_indiv_afr_egfr_50 = n_class_afr[n_class_afr$IID %in% df_ntile_norm_afr_egfr_50$IID,]

summary(n_class_afr_indiv_afr_egfr_50$COUNT_PGS)
summary(n_class_afr_indiv_afr_egfr_50$STABILITY)
length(unique(n_class_afr_indiv_afr_egfr_50$IID))

#### meta egfr

In [None]:
n_class_afr_indiv_meta_egfr_80 = n_class_afr[n_class_afr$IID %in% df_ntile_norm_meta_egfr_80$IID,]

summary(n_class_afr_indiv_meta_egfr_80$COUNT_PGS)
summary(n_class_afr_indiv_meta_egfr_80$STABILITY)
length(unique(n_class_afr_indiv_meta_egfr_80$IID))

In [None]:
n_class_afr_indiv_meta_egfr_70 = n_class_afr[n_class_afr$IID %in% df_ntile_norm_meta_egfr_70$IID,]

summary(n_class_afr_indiv_meta_egfr_70$COUNT_PGS)
summary(n_class_afr_indiv_meta_egfr_70$STABILITY)
length(unique(n_class_afr_indiv_meta_egfr_70$IID))

In [None]:
n_class_afr_indiv_meta_egfr_50 = n_class_afr[n_class_afr$IID %in% df_ntile_norm_meta_egfr_50$IID,]

summary(n_class_afr_indiv_meta_egfr_50$COUNT_PGS)
summary(n_class_afr_indiv_meta_egfr_50$STABILITY)
length(unique(n_class_afr_indiv_meta_egfr_50$IID))

### afr individuals multiancestry scores

#### afr egfr

In [None]:
n_class_afr_indiv_multiancestry_score_afr_egfr_80 = n_class_afr_multiancestry_score[n_class_afr_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_80$IID,]

summary(n_class_afr_indiv_multiancestry_score_afr_egfr_80$COUNT_PGS)
summary(n_class_afr_indiv_multiancestry_score_afr_egfr_80$STABILITY)
length(unique(n_class_afr_indiv_multiancestry_score_afr_egfr_80$IID))

In [None]:
n_class_afr_indiv_multiancestry_score_afr_egfr_70 = n_class_afr_multiancestry_score[n_class_afr_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_70$IID,]

summary(n_class_afr_indiv_multiancestry_score_afr_egfr_70$COUNT_PGS)
summary(n_class_afr_indiv_multiancestry_score_afr_egfr_70$STABILITY)
length(unique(n_class_afr_indiv_multiancestry_score_afr_egfr_70$IID))

In [None]:
n_class_afr_indiv_multiancestry_score_afr_egfr_50 = n_class_afr_multiancestry_score[n_class_afr_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_50$IID,]

summary(n_class_afr_indiv_multiancestry_score_afr_egfr_50$COUNT_PGS)
summary(n_class_afr_indiv_multiancestry_score_afr_egfr_50$STABILITY)
length(unique(n_class_afr_indiv_multiancestry_score_afr_egfr_50$IID))

#### meta egfr

In [None]:
n_class_afr_indiv_multiancestry_score_meta_egfr_80 = n_class_afr_multiancestry_score[n_class_afr_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_80$IID,]

summary(n_class_afr_indiv_multiancestry_score_meta_egfr_80$COUNT_PGS)
summary(n_class_afr_indiv_multiancestry_score_meta_egfr_80$STABILITY)
length(unique(n_class_afr_indiv_multiancestry_score_meta_egfr_80$IID))

In [None]:
n_class_afr_indiv_multiancestry_score_meta_egfr_70 = n_class_afr_multiancestry_score[n_class_afr_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_70$IID,]

summary(n_class_afr_indiv_multiancestry_score_meta_egfr_70$COUNT_PGS)
summary(n_class_afr_indiv_multiancestry_score_meta_egfr_70$STABILITY)
length(unique(n_class_afr_indiv_multiancestry_score_meta_egfr_70$IID))

In [None]:
n_class_afr_indiv_multiancestry_score_meta_egfr_50 = n_class_afr_multiancestry_score[n_class_afr_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_50$IID,]

summary(n_class_afr_indiv_multiancestry_score_meta_egfr_50$COUNT_PGS)
summary(n_class_afr_indiv_multiancestry_score_meta_egfr_50$STABILITY)
length(unique(n_class_afr_indiv_multiancestry_score_meta_egfr_50$IID))

## filter to high confidence individuals <= 2nd percentile

### all

#### eur egfr

In [None]:
n_class_low_eur_egfr_95 = n_class_low[n_class_low$IID %in% df_ntile_norm_eur_egfr_95_low$IID,]

summary(n_class_low_eur_egfr_95$COUNT_PGS)
summary(n_class_low_eur_egfr_95$STABILITY)
length(unique(n_class_low_eur_egfr_95$IID))

In [None]:
n_class_low_eur_egfr_90 = n_class_low[n_class_low$IID %in% df_ntile_norm_eur_egfr_90_low$IID,]

summary(n_class_low_eur_egfr_90$COUNT_PGS)
summary(n_class_low_eur_egfr_90$STABILITY)
length(unique(n_class_low_eur_egfr_90$IID))

In [None]:
n_class_low_eur_egfr_80 = n_class_low[n_class_low$IID %in% df_ntile_norm_eur_egfr_80_low$IID,]

summary(n_class_low_eur_egfr_80$COUNT_PGS)
summary(n_class_low_eur_egfr_80$STABILITY)
length(unique(n_class_low_eur_egfr_80$IID))

In [None]:
n_class_low_eur_egfr_70 = n_class_low[n_class_low$IID %in% df_ntile_norm_eur_egfr_70_low$IID,]

summary(n_class_low_eur_egfr_70$COUNT_PGS)
summary(n_class_low_eur_egfr_70$STABILITY)
length(unique(n_class_low_eur_egfr_70$IID))

In [None]:
n_class_low_eur_egfr_50 = n_class_low[n_class_low$IID %in% df_ntile_norm_eur_egfr_50_low$IID,]

summary(n_class_low_eur_egfr_50$COUNT_PGS)
summary(n_class_low_eur_egfr_50$STABILITY)
length(unique(n_class_low_eur_egfr_50$IID))

#### afr egfr

In [None]:
n_class_low_afr_egfr_80 = n_class_low[n_class_low$IID %in% df_ntile_norm_afr_egfr_80_low$IID,]

summary(n_class_low_afr_egfr_80$COUNT_PGS)
summary(n_class_low_afr_egfr_80$STABILITY)
length(unique(n_class_low_afr_egfr_80$IID))

In [None]:
n_class_low_afr_egfr_70 = n_class_low[n_class_low$IID %in% df_ntile_norm_afr_egfr_70_low$IID,]

summary(n_class_low_afr_egfr_70$COUNT_PGS)
summary(n_class_low_afr_egfr_70$STABILITY)
length(unique(n_class_low_afr_egfr_70$IID))

In [None]:
n_class_low_afr_egfr_50 = n_class_low[n_class_low$IID %in% df_ntile_norm_afr_egfr_50_low$IID,]

summary(n_class_low_afr_egfr_50$COUNT_PGS)
summary(n_class_low_afr_egfr_50$STABILITY)
length(unique(n_class_low_afr_egfr_50$IID))

#### meta egfr

In [None]:
n_class_low_meta_egfr_95 = n_class_low[n_class_low$IID %in% df_ntile_norm_meta_egfr_95_low$IID,]

summary(n_class_low_meta_egfr_95$COUNT_PGS)
summary(n_class_low_meta_egfr_95$STABILITY)
length(unique(n_class_low_meta_egfr_95$IID))

In [None]:
n_class_low_meta_egfr_90 = n_class_low[n_class_low$IID %in% df_ntile_norm_meta_egfr_90_low$IID,]

summary(n_class_low_meta_egfr_90$COUNT_PGS)
summary(n_class_low_meta_egfr_90$STABILITY)
length(unique(n_class_low_meta_egfr_90$IID))

In [None]:
n_class_low_meta_egfr_80 = n_class_low[n_class_low$IID %in% df_ntile_norm_meta_egfr_80_low$IID,]

summary(n_class_low_meta_egfr_80$COUNT_PGS)
summary(n_class_low_meta_egfr_80$STABILITY)
length(unique(n_class_low_meta_egfr_80$IID))

In [None]:
n_class_low_meta_egfr_70 = n_class_low[n_class_low$IID %in% df_ntile_norm_meta_egfr_70_low$IID,]

summary(n_class_low_meta_egfr_70$COUNT_PGS)
summary(n_class_low_meta_egfr_70$STABILITY)
length(unique(n_class_low_meta_egfr_70$IID))

In [None]:
n_class_low_meta_egfr_50 = n_class_low[n_class_low$IID %in% df_ntile_norm_meta_egfr_50_low$IID,]

summary(n_class_low_meta_egfr_50$COUNT_PGS)
summary(n_class_low_meta_egfr_50$STABILITY)
length(unique(n_class_low_meta_egfr_50$IID))

### all individuals multiancestry scores

#### eur egfr

In [None]:
n_class_low_multiancestry_score_eur_egfr_95 = n_class_low_multiancestry_score[n_class_low_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_95_low$IID,]

summary(n_class_low_multiancestry_score_eur_egfr_95$COUNT_PGS)
summary(n_class_low_multiancestry_score_eur_egfr_95$STABILITY)
length(unique(n_class_low_multiancestry_score_eur_egfr_95$IID))

In [None]:
n_class_low_multiancestry_score_eur_egfr_90 = n_class_low_multiancestry_score[n_class_low_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_90_low$IID,]

summary(n_class_low_multiancestry_score_eur_egfr_90$COUNT_PGS)
summary(n_class_low_multiancestry_score_eur_egfr_90$STABILITY)
length(unique(n_class_low_multiancestry_score_eur_egfr_90$IID))

In [None]:
n_class_low_multiancestry_score_eur_egfr_80 = n_class_low_multiancestry_score[n_class_low_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_80_low$IID,]

summary(n_class_low_multiancestry_score_eur_egfr_80$COUNT_PGS)
summary(n_class_low_multiancestry_score_eur_egfr_80$STABILITY)
length(unique(n_class_low_multiancestry_score_eur_egfr_80$IID))

In [None]:
n_class_low_multiancestry_score_eur_egfr_70 = n_class_low_multiancestry_score[n_class_low_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_70_low$IID,]

summary(n_class_low_multiancestry_score_eur_egfr_70$COUNT_PGS)
summary(n_class_low_multiancestry_score_eur_egfr_70$STABILITY)
length(unique(n_class_low_multiancestry_score_eur_egfr_70$IID))

In [None]:
n_class_low_multiancestry_score_eur_egfr_50 = n_class_low_multiancestry_score[n_class_low_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_50_low$IID,]

summary(n_class_low_multiancestry_score_eur_egfr_50$COUNT_PGS)
summary(n_class_low_multiancestry_score_eur_egfr_50$STABILITY)
length(unique(n_class_low_multiancestry_score_eur_egfr_95$IID))

#### afr egfr

In [None]:
n_class_low_multiancestry_score_afr_egfr_80 = n_class_low_multiancestry_score[n_class_low_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_80_low$IID,]

summary(n_class_low_multiancestry_score_afr_egfr_80$COUNT_PGS)
summary(n_class_low_multiancestry_score_afr_egfr_80$STABILITY)
length(unique(n_class_low_multiancestry_score_afr_egfr_80$IID))

In [None]:
n_class_low_multiancestry_score_afr_egfr_70 = n_class_low_multiancestry_score[n_class_low_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_70_low$IID,]

summary(n_class_low_multiancestry_score_afr_egfr_70$COUNT_PGS)
summary(n_class_low_multiancestry_score_afr_egfr_70$STABILITY)
length(unique(n_class_low_multiancestry_score_afr_egfr_70$IID))

In [None]:
n_class_low_multiancestry_score_afr_egfr_50 = n_class_low_multiancestry_score[n_class_low_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_50_low$IID,]

summary(n_class_low_multiancestry_score_afr_egfr_50$COUNT_PGS)
summary(n_class_low_multiancestry_score_afr_egfr_50$STABILITY)
length(unique(n_class_low_multiancestry_score_afr_egfr_50$IID))

#### meta egfr

In [None]:
n_class_low_multiancestry_score_meta_egfr_95 = n_class_low_multiancestry_score[n_class_low_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_95_low$IID,]

summary(n_class_low_multiancestry_score_meta_egfr_95$COUNT_PGS)
summary(n_class_low_multiancestry_score_meta_egfr_95$STABILITY)
length(unique(n_class_low_multiancestry_score_meta_egfr_95$IID))

In [None]:
n_class_low_multiancestry_score_meta_egfr_90 = n_class_low_multiancestry_score[n_class_low_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_90_low$IID,]

summary(n_class_low_multiancestry_score_meta_egfr_90$COUNT_PGS)
summary(n_class_low_multiancestry_score_meta_egfr_90$STABILITY)
length(unique(n_class_low_multiancestry_score_meta_egfr_90$IID))

In [None]:
n_class_low_multiancestry_score_meta_egfr_80 = n_class_low_multiancestry_score[n_class_low_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_80_low$IID,]

summary(n_class_low_multiancestry_score_meta_egfr_80$COUNT_PGS)
summary(n_class_low_multiancestry_score_meta_egfr_80$STABILITY)
length(unique(n_class_low_multiancestry_score_meta_egfr_80$IID))

In [None]:
n_class_low_multiancestry_score_meta_egfr_70 = n_class_low_multiancestry_score[n_class_low_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_70_low$IID,]

summary(n_class_low_multiancestry_score_meta_egfr_70$COUNT_PGS)
summary(n_class_low_multiancestry_score_meta_egfr_70$STABILITY)
length(unique(n_class_low_multiancestry_score_meta_egfr_70$IID))

In [None]:
n_class_low_multiancestry_score_meta_egfr_50 = n_class_low_multiancestry_score[n_class_low_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_50_low$IID,]

summary(n_class_low_multiancestry_score_meta_egfr_50$COUNT_PGS)
summary(n_class_low_multiancestry_score_meta_egfr_50$STABILITY)
length(unique(n_class_low_multiancestry_score_meta_egfr_50$IID))

### eur

#### eur egfr

In [None]:
n_class_low_eur_indiv_eur_egfr_95 = n_class_low_eur[n_class_low_eur$IID %in% df_ntile_norm_eur_egfr_95_low$IID,]

summary(n_class_low_eur_indiv_eur_egfr_95$COUNT_PGS)
summary(n_class_low_eur_indiv_eur_egfr_95$STABILITY)
length(unique(n_class_low_eur_indiv_eur_egfr_95$IID))

In [None]:
n_class_low_eur_indiv_eur_egfr_90 = n_class_low_eur[n_class_low_eur$IID %in% df_ntile_norm_eur_egfr_90_low$IID,]

summary(n_class_low_eur_indiv_eur_egfr_90$COUNT_PGS)
summary(n_class_low_eur_indiv_eur_egfr_90$STABILITY)
length(unique(n_class_low_eur_indiv_eur_egfr_90$IID))

In [None]:
n_class_low_eur_indiv_eur_egfr_80 = n_class_low_eur[n_class_low_eur$IID %in% df_ntile_norm_eur_egfr_80_low$IID,]

summary(n_class_low_eur_indiv_eur_egfr_80$COUNT_PGS)
summary(n_class_low_eur_indiv_eur_egfr_80$STABILITY)
length(unique(n_class_low_eur_indiv_eur_egfr_80$IID))

In [None]:
n_class_low_eur_indiv_eur_egfr_70 = n_class_low_eur[n_class_low_eur$IID %in% df_ntile_norm_eur_egfr_70_low$IID,]

summary(n_class_low_eur_indiv_eur_egfr_70$COUNT_PGS)
summary(n_class_low_eur_indiv_eur_egfr_70$STABILITY)
length(unique(n_class_low_eur_indiv_eur_egfr_70$IID))

In [None]:
n_class_low_eur_indiv_eur_egfr_50 = n_class_low_eur[n_class_low_eur$IID %in% df_ntile_norm_eur_egfr_50_low$IID,]

summary(n_class_low_eur_indiv_eur_egfr_50$COUNT_PGS)
summary(n_class_low_eur_indiv_eur_egfr_50$STABILITY)
length(unique(n_class_low_eur_indiv_eur_egfr_50$IID))

#### meta egfr

In [None]:
n_class_low_eur_indiv_meta_egfr_95 = n_class_low_eur[n_class_low_eur$IID %in% df_ntile_norm_meta_egfr_95_low$IID,]

summary(n_class_low_eur_indiv_meta_egfr_95$COUNT_PGS)
summary(n_class_low_eur_indiv_meta_egfr_95$STABILITY)
length(unique(n_class_low_eur_indiv_meta_egfr_95$IID))

In [None]:
n_class_low_eur_indiv_meta_egfr_90 = n_class_low_eur[n_class_low_eur$IID %in% df_ntile_norm_meta_egfr_90_low$IID,]

summary(n_class_low_eur_indiv_meta_egfr_90$COUNT_PGS)
summary(n_class_low_eur_indiv_meta_egfr_90$STABILITY)
length(unique(n_class_low_eur_indiv_meta_egfr_90$IID))

In [None]:
n_class_low_eur_indiv_meta_egfr_80 = n_class_low_eur[n_class_low_eur$IID %in% df_ntile_norm_meta_egfr_80_low$IID,]

summary(n_class_low_eur_indiv_meta_egfr_80$COUNT_PGS)
summary(n_class_low_eur_indiv_meta_egfr_80$STABILITY)
length(unique(n_class_low_eur_indiv_meta_egfr_80$IID))

In [None]:
n_class_low_eur_indiv_meta_egfr_70 = n_class_low_eur[n_class_low_eur$IID %in% df_ntile_norm_meta_egfr_70_low$IID,]

summary(n_class_low_eur_indiv_meta_egfr_70$COUNT_PGS)
summary(n_class_low_eur_indiv_meta_egfr_70$STABILITY)
length(unique(n_class_low_eur_indiv_meta_egfr_70$IID))

In [None]:
n_class_low_eur_indiv_meta_egfr_50 = n_class_low_eur[n_class_low_eur$IID %in% df_ntile_norm_meta_egfr_50_low$IID,]

summary(n_class_low_eur_indiv_meta_egfr_50$COUNT_PGS)
summary(n_class_low_eur_indiv_meta_egfr_50$STABILITY)
length(unique(n_class_low_eur_indiv_meta_egfr_50$IID))

### eur individuals multiancestry scores

#### eur egfr

In [None]:
n_class_low_eur_indiv_multiancestry_score_eur_egfr_95 = n_class_low_eur_multiancestry_score[n_class_low_eur_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_95_low$IID,]

summary(n_class_low_eur_indiv_multiancestry_score_eur_egfr_95$COUNT_PGS)
summary(n_class_low_eur_indiv_multiancestry_score_eur_egfr_95$STABILITY)
length(unique(n_class_low_eur_indiv_multiancestry_score_eur_egfr_95$IID))

In [None]:
n_class_low_eur_indiv_multiancestry_score_eur_egfr_90 = n_class_low_eur_multiancestry_score[n_class_low_eur_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_90_low$IID,]

summary(n_class_low_eur_indiv_multiancestry_score_eur_egfr_90$COUNT_PGS)
summary(n_class_low_eur_indiv_multiancestry_score_eur_egfr_90$STABILITY)
length(unique(n_class_low_eur_indiv_multiancestry_score_eur_egfr_90$IID))

In [None]:
n_class_low_eur_indiv_multiancestry_score_eur_egfr_80 = n_class_low_eur_multiancestry_score[n_class_low_eur_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_80_low$IID,]

summary(n_class_low_eur_indiv_multiancestry_score_eur_egfr_80$COUNT_PGS)
summary(n_class_low_eur_indiv_multiancestry_score_eur_egfr_80$STABILITY)
length(unique(n_class_low_eur_indiv_multiancestry_score_eur_egfr_80$IID))

In [None]:
n_class_low_eur_indiv_multiancestry_score_eur_egfr_70 = n_class_low_eur_multiancestry_score[n_class_low_eur_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_70_low$IID,]

summary(n_class_low_eur_indiv_multiancestry_score_eur_egfr_70$COUNT_PGS)
summary(n_class_low_eur_indiv_multiancestry_score_eur_egfr_70$STABILITY)
length(unique(n_class_low_eur_indiv_multiancestry_score_eur_egfr_70$IID))

In [None]:
n_class_low_eur_indiv_multiancestry_score_eur_egfr_50 = n_class_low_eur_multiancestry_score[n_class_low_eur_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_50_low$IID,]

summary(n_class_low_eur_indiv_multiancestry_score_eur_egfr_50$COUNT_PGS)
summary(n_class_low_eur_indiv_multiancestry_score_eur_egfr_50$STABILITY)
length(unique(n_class_low_eur_indiv_multiancestry_score_eur_egfr_50$IID))

#### meta egfr

In [None]:
n_class_low_eur_indiv_multiancestry_score_meta_egfr_95 = n_class_low_eur_multiancestry_score[n_class_low_eur_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_95_low$IID,]

summary(n_class_low_eur_indiv_multiancestry_score_meta_egfr_95$COUNT_PGS)
summary(n_class_low_eur_indiv_multiancestry_score_meta_egfr_95$STABILITY)
length(unique(n_class_low_eur_indiv_multiancestry_score_meta_egfr_95$IID))

In [None]:
n_class_low_eur_indiv_multiancestry_score_meta_egfr_90 = n_class_low_eur_multiancestry_score[n_class_low_eur_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_90_low$IID,]

summary(n_class_low_eur_indiv_multiancestry_score_meta_egfr_90$COUNT_PGS)
summary(n_class_low_eur_indiv_multiancestry_score_meta_egfr_90$STABILITY)
length(unique(n_class_low_eur_indiv_multiancestry_score_meta_egfr_90$IID))

In [None]:
n_class_low_eur_indiv_multiancestry_score_meta_egfr_80 = n_class_low_eur_multiancestry_score[n_class_low_eur_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_80_low$IID,]

summary(n_class_low_eur_indiv_multiancestry_score_meta_egfr_80$COUNT_PGS)
summary(n_class_low_eur_indiv_multiancestry_score_meta_egfr_80$STABILITY)
length(unique(n_class_low_eur_indiv_multiancestry_score_meta_egfr_80$IID))

In [None]:
n_class_low_eur_indiv_multiancestry_score_meta_egfr_70 = n_class_low_eur_multiancestry_score[n_class_low_eur_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_70_low$IID,]

summary(n_class_low_eur_indiv_multiancestry_score_meta_egfr_70$COUNT_PGS)
summary(n_class_low_eur_indiv_multiancestry_score_meta_egfr_70$STABILITY)
length(unique(n_class_low_eur_indiv_multiancestry_score_meta_egfr_70$IID))

In [None]:
n_class_low_eur_indiv_multiancestry_score_meta_egfr_50 = n_class_low_eur_multiancestry_score[n_class_low_eur_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_50_low$IID,]

summary(n_class_low_eur_indiv_multiancestry_score_meta_egfr_50$COUNT_PGS)
summary(n_class_low_eur_indiv_multiancestry_score_meta_egfr_50$STABILITY)
length(unique(n_class_low_eur_indiv_multiancestry_score_meta_egfr_50$IID))

### afr

#### afr egfr

In [None]:
n_class_low_afr_indiv_afr_egfr_80 = n_class_low_afr[n_class_low_afr$IID %in% df_ntile_norm_afr_egfr_80_low$IID,]

summary(n_class_low_afr_indiv_afr_egfr_80$COUNT_PGS)
summary(n_class_low_afr_indiv_afr_egfr_80$STABILITY)
length(unique(n_class_low_afr_indiv_afr_egfr_80$IID))

In [None]:
n_class_low_afr_indiv_afr_egfr_70 = n_class_low_afr[n_class_low_afr$IID %in% df_ntile_norm_afr_egfr_70_low$IID,]

summary(n_class_low_afr_indiv_afr_egfr_70$COUNT_PGS)
summary(n_class_low_afr_indiv_afr_egfr_70$STABILITY)
length(unique(n_class_low_afr_indiv_afr_egfr_70$IID))

In [None]:
n_class_low_afr_indiv_afr_egfr_50 = n_class_low_afr[n_class_low_afr$IID %in% df_ntile_norm_afr_egfr_50_low$IID,]

summary(n_class_low_afr_indiv_afr_egfr_50$COUNT_PGS)
summary(n_class_low_afr_indiv_afr_egfr_50$STABILITY)
length(unique(n_class_low_afr_indiv_afr_egfr_50$IID))

#### meta egfr

In [None]:
n_class_low_afr_indiv_meta_egfr_50 = n_class_low_afr[n_class_low_afr$IID %in% df_ntile_norm_meta_egfr_50_low$IID,]

summary(n_class_low_afr_indiv_meta_egfr_50$COUNT_PGS)
summary(n_class_low_afr_indiv_meta_egfr_50$STABILITY)
length(unique(n_class_low_afr_indiv_meta_egfr_50$IID))

### afr individuals multiancestry scores

#### afr egfr

In [None]:
n_class_low_afr_indiv_multiancestry_score_afr_egfr_80 = n_class_low_afr_multiancestry_score[n_class_low_afr_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_80_low$IID,]

summary(n_class_low_afr_indiv_multiancestry_score_afr_egfr_80$COUNT_PGS)
summary(n_class_low_afr_indiv_multiancestry_score_afr_egfr_80$STABILITY)
length(unique(n_class_low_afr_indiv_multiancestry_score_afr_egfr_80$IID))

In [None]:
n_class_low_afr_indiv_multiancestry_score_afr_egfr_70 = n_class_low_afr_multiancestry_score[n_class_low_afr_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_70_low$IID,]

summary(n_class_low_afr_indiv_multiancestry_score_afr_egfr_70$COUNT_PGS)
summary(n_class_low_afr_indiv_multiancestry_score_afr_egfr_70$STABILITY)
length(unique(n_class_low_afr_indiv_multiancestry_score_afr_egfr_70$IID))

In [None]:
n_class_low_afr_indiv_multiancestry_score_afr_egfr_50 = n_class_low_afr_multiancestry_score[n_class_low_afr_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_50_low$IID,]

summary(n_class_low_afr_indiv_multiancestry_score_afr_egfr_50$COUNT_PGS)
summary(n_class_low_afr_indiv_multiancestry_score_afr_egfr_50$STABILITY)
length(unique(n_class_low_afr_indiv_multiancestry_score_afr_egfr_50$IID))

#### meta egfr

In [None]:
n_class_low_afr_indiv_multiancestry_score_meta_egfr_50 = n_class_low_afr_multiancestry_score[n_class_low_afr_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_50_low$IID,]

summary(n_class_low_afr_indiv_multiancestry_score_meta_egfr_50$COUNT_PGS)
summary(n_class_low_afr_indiv_multiancestry_score_meta_egfr_50$STABILITY)
length(unique(n_class_low_afr_indiv_multiancestry_score_meta_egfr_50$IID))

## make plot inputs for >= 98th percentile

### all

#### eur egfr

In [None]:
stability_plot_input_eur_egfr = n_class %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_eur_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'EUR.eGFR.PRScs')
head(stability_plot_input_eur_egfr)

#### afr egfr

In [None]:
stability_plot_input_afr_egfr = n_class %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_afr_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_afr_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_afr_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'AFR.eGFR.PRScs')
head(stability_plot_input_afr_egfr)

#### meta egfr

In [None]:
stability_plot_input_meta_egfr = n_class %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_meta_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_meta_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_meta_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_meta_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_meta_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'META.eGFR.PRScsx')
head(stability_plot_input_meta_egfr)

#### combined

In [None]:
stability_plot_input_comb = stability_plot_input_eur_egfr %>%
bind_rows(stability_plot_input_afr_egfr) %>%
bind_rows(stability_plot_input_meta_egfr)
head(stability_plot_input_comb)

### all individuals multiancestry scores

#### eur egfr

In [None]:
stability_plot_input_multiancestry_score_eur_egfr = n_class_multiancestry_score %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_eur_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_eur_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_eur_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_eur_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_eur_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'EUR.eGFR.PRScs')
head(stability_plot_input_multiancestry_score_eur_egfr)

#### afr egfr

In [None]:
stability_plot_input_multiancestry_score_afr_egfr = n_class_multiancestry_score %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_afr_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_afr_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_afr_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'AFR.eGFR.PRScs')
head(stability_plot_input_multiancestry_score_afr_egfr)

#### meta egfr

In [None]:
stability_plot_input_multiancestry_score_meta_egfr = n_class_multiancestry_score %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_meta_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_meta_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_meta_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_meta_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_meta_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'META.eGFR.PRScsx')
head(stability_plot_input_multiancestry_score_meta_egfr)

#### combined

In [None]:
stability_plot_input_multiancestry_score_comb = stability_plot_input_multiancestry_score_eur_egfr %>%
bind_rows(stability_plot_input_multiancestry_score_afr_egfr) %>%
bind_rows(stability_plot_input_multiancestry_score_meta_egfr)
head(stability_plot_input_multiancestry_score_comb)

### eur

#### eur egfr

In [None]:
stability_plot_input_eur_indiv_eur_egfr = n_class_eur %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_eur_indiv_eur_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_eur_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_eur_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_eur_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_eur_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'EUR.eGFR.PRScs')
head(stability_plot_input_eur_indiv_eur_egfr)

#### meta egfr

In [None]:
stability_plot_input_eur_indiv_meta_egfr = n_class_eur %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_eur_indiv_meta_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_meta_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_meta_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_meta_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_meta_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'META.eGFR.PRScsx')
head(stability_plot_input_eur_indiv_meta_egfr)

#### combined

In [None]:
stability_plot_input_eur_indiv_comb = stability_plot_input_eur_indiv_eur_egfr %>%
bind_rows(stability_plot_input_eur_indiv_meta_egfr)
head(stability_plot_input_eur_indiv_comb)

### eur individuals multiancestry scores

#### eur egfr

In [None]:
stability_plot_input_eur_indiv_multiancestry_score_eur_egfr = n_class_eur_multiancestry_score %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_eur_indiv_multiancestry_score_eur_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_multiancestry_score_eur_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_multiancestry_score_eur_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_multiancestry_score_eur_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_multiancestry_score_eur_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'EUR.eGFR.PRScs')
head(stability_plot_input_eur_indiv_multiancestry_score_eur_egfr)

#### meta egfr

In [None]:
stability_plot_input_eur_indiv_multiancestry_score_meta_egfr = n_class_eur_multiancestry_score %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_eur_indiv_multiancestry_score_meta_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_multiancestry_score_meta_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_multiancestry_score_meta_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_multiancestry_score_meta_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_multiancestry_score_meta_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'META.eGFR.PRScsx')
head(stability_plot_input_eur_indiv_multiancestry_score_meta_egfr)

#### combined

In [None]:
stability_plot_input_eur_indiv_multiancestry_score_comb = stability_plot_input_eur_indiv_multiancestry_score_eur_egfr %>%
bind_rows(stability_plot_input_eur_indiv_multiancestry_score_meta_egfr)
head(stability_plot_input_eur_indiv_multiancestry_score_comb)

### afr

#### afr egfr

In [None]:
stability_plot_input_afr_indiv_afr_egfr = n_class_afr %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_afr_indiv_afr_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_afr_indiv_afr_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_afr_indiv_afr_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'AFR.eGFR.PRScs')
head(stability_plot_input_afr_indiv_afr_egfr)

#### meta egfr

In [None]:
stability_plot_input_afr_indiv_meta_egfr = n_class_afr %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_afr_indiv_meta_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_afr_indiv_meta_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'META.eGFR.PRScsx')
head(stability_plot_input_afr_indiv_meta_egfr)

#### combined

In [None]:
stability_plot_input_afr_indiv_comb = stability_plot_input_afr_indiv_afr_egfr %>%
bind_rows(stability_plot_input_afr_indiv_meta_egfr)
head(stability_plot_input_afr_indiv_comb)

### afr individuals multiancestry scores

#### afr egfr

In [None]:
stability_plot_input_afr_indiv_multiancestry_score_afr_egfr = n_class_afr_multiancestry_score %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_afr_indiv_multiancestry_score_afr_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_afr_indiv_multiancestry_score_afr_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_afr_indiv_multiancestry_score_afr_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'AFR.eGFR.PRScs')
head(stability_plot_input_afr_indiv_multiancestry_score_afr_egfr)

#### meta egfr

In [None]:
stability_plot_input_afr_indiv_multiancestry_score_meta_egfr = n_class_afr_multiancestry_score %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_afr_indiv_multiancestry_score_meta_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_afr_indiv_multiancestry_score_meta_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'META.eGFR.PRScsx')
head(stability_plot_input_afr_indiv_multiancestry_score_meta_egfr)

#### combined

In [None]:
stability_plot_input_afr_indiv_multiancestry_score_comb = stability_plot_input_afr_indiv_multiancestry_score_afr_egfr %>%
bind_rows(stability_plot_input_afr_indiv_multiancestry_score_meta_egfr)
head(stability_plot_input_afr_indiv_multiancestry_score_comb)

## make plot inputs for <= 2nd percentile

### all

#### eur egfr

In [None]:
stability_plot_input_eur_egfr_low = n_class_low %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_low_eur_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_low_eur_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_low_eur_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_low_eur_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_low_eur_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'EUR.eGFR.PRScs')
head(stability_plot_input_eur_egfr_low)

#### afr egfr

In [None]:
stability_plot_input_afr_egfr_low = n_class_low %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_low_afr_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_low_afr_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_low_afr_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'AFR.eGFR.PRScs')
head(stability_plot_input_afr_egfr_low)

#### meta egfr

In [None]:
stability_plot_input_meta_egfr_low = n_class_low %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_low_meta_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_low_meta_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_low_meta_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_low_meta_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_low_meta_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'META.eGFR.PRScsx')
head(stability_plot_input_meta_egfr_low)

#### combined

In [None]:
stability_plot_input_comb_low = stability_plot_input_eur_egfr_low %>%
bind_rows(stability_plot_input_afr_egfr_low) %>%
bind_rows(stability_plot_input_meta_egfr_low)
head(stability_plot_input_comb_low)

### all individuals multiancestry scores

#### eur egfr

In [None]:
stability_plot_input_multiancestry_score_eur_egfr_low = n_class_low_multiancestry_score %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_low_multiancestry_score_eur_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_low_multiancestry_score_eur_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_low_multiancestry_score_eur_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_low_multiancestry_score_eur_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_low_multiancestry_score_eur_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'EUR.eGFR.PRScs')
head(stability_plot_input_multiancestry_score_eur_egfr_low)

#### afr egfr

In [None]:
stability_plot_input_multiancestry_score_afr_egfr_low = n_class_low_multiancestry_score %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_low_multiancestry_score_afr_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_low_multiancestry_score_afr_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_low_multiancestry_score_afr_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'AFR.eGFR.PRScs')
head(stability_plot_input_multiancestry_score_afr_egfr_low)

#### meta egfr

In [None]:
stability_plot_input_multiancestry_score_meta_egfr_low = n_class_low_multiancestry_score %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_low_multiancestry_score_meta_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_low_multiancestry_score_meta_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_low_multiancestry_score_meta_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_low_multiancestry_score_meta_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_low_multiancestry_score_meta_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'META.eGFR.PRScsx')
head(stability_plot_input_multiancestry_score_meta_egfr_low)

#### combined

In [None]:
stability_plot_input_multiancestry_score_comb_low = stability_plot_input_multiancestry_score_eur_egfr_low %>%
bind_rows(stability_plot_input_multiancestry_score_afr_egfr_low) %>%
bind_rows(stability_plot_input_multiancestry_score_meta_egfr_low)
head(stability_plot_input_multiancestry_score_comb_low)

### eur

#### eur egfr

In [None]:
stability_plot_input_eur_indiv_eur_egfr_low = n_class_low_eur %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_low_eur_indiv_eur_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_low_eur_indiv_eur_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_low_eur_indiv_eur_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_low_eur_indiv_eur_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_low_eur_indiv_eur_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'EUR.eGFR.PRScs')
head(stability_plot_input_eur_indiv_eur_egfr_low)

#### meta egfr

In [None]:
stability_plot_input_eur_indiv_meta_egfr_low = n_class_low_eur %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_low_eur_indiv_meta_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_low_eur_indiv_meta_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_low_eur_indiv_meta_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_low_eur_indiv_meta_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_low_eur_indiv_meta_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'META.eGFR.PRScsx')
head(stability_plot_input_eur_indiv_meta_egfr_low)

#### combined

In [None]:
stability_plot_input_eur_indiv_comb_low = stability_plot_input_eur_indiv_eur_egfr_low %>%
bind_rows(stability_plot_input_eur_indiv_meta_egfr_low)
unique(stability_plot_input_eur_indiv_comb_low$TRAINING_GROUP)
head(stability_plot_input_eur_indiv_comb_low)

### eur individuals multiancestry scores

#### eur egfr

In [None]:
stability_plot_input_eur_indiv_multiancestry_score_eur_egfr_low = n_class_low_eur_multiancestry_score %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_low_eur_indiv_multiancestry_score_eur_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_low_eur_indiv_multiancestry_score_eur_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_low_eur_indiv_multiancestry_score_eur_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_low_eur_indiv_multiancestry_score_eur_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_low_eur_indiv_multiancestry_score_eur_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'EUR.eGFR.PRScs')
head(stability_plot_input_eur_indiv_multiancestry_score_eur_egfr_low)

#### meta egfr

In [None]:
stability_plot_input_eur_indiv_multiancestry_score_meta_egfr_low = n_class_low_eur_multiancestry_score %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_low_eur_indiv_multiancestry_score_meta_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_low_eur_indiv_multiancestry_score_meta_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_low_eur_indiv_multiancestry_score_meta_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_low_eur_indiv_multiancestry_score_meta_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_low_eur_indiv_multiancestry_score_meta_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'META.eGFR.PRScsx')
head(stability_plot_input_eur_indiv_multiancestry_score_meta_egfr_low)

#### combined

In [None]:
stability_plot_input_eur_indiv_multiancestry_score_comb_low = stability_plot_input_eur_indiv_multiancestry_score_eur_egfr_low %>%
bind_rows(stability_plot_input_eur_indiv_multiancestry_score_meta_egfr_low)
head(stability_plot_input_eur_indiv_multiancestry_score_comb_low)

### afr

#### afr egfr

In [None]:
stability_plot_input_afr_indiv_afr_egfr_low = n_class_low_afr %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_low_afr_indiv_afr_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_low_afr_indiv_afr_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_low_afr_indiv_afr_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'AFR.eGFR.PRScs')
head(stability_plot_input_afr_indiv_afr_egfr_low)

#### meta egfr

In [None]:
stability_plot_input_afr_indiv_meta_egfr_low = n_class_low_afr %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_low_afr_indiv_meta_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'META.eGFR.PRScsx')
head(stability_plot_input_afr_indiv_meta_egfr_low)

#### combined

In [None]:
stability_plot_input_afr_indiv_comb_low = stability_plot_input_afr_indiv_afr_egfr_low %>%
bind_rows(stability_plot_input_afr_indiv_meta_egfr_low)
head(stability_plot_input_afr_indiv_comb_low)

### all individuals multiancestry scores

#### afr egfr

In [None]:
stability_plot_input_afr_indiv_multiancestry_score_afr_egfr_low = n_class_low_afr_multiancestry_score %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_low_afr_indiv_multiancestry_score_afr_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_low_afr_indiv_multiancestry_score_afr_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_low_afr_indiv_multiancestry_score_afr_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'AFR.eGFR.PRScs')
head(stability_plot_input_afr_indiv_multiancestry_score_afr_egfr_low)

#### meta egfr

In [None]:
stability_plot_input_afr_indiv_multiancestry_score_meta_egfr_low = n_class_low_afr_multiancestry_score %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_low_afr_indiv_multiancestry_score_meta_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'META.eGFR.PRScsx')
head(stability_plot_input_afr_indiv_multiancestry_score_meta_egfr_low)

#### combined

In [None]:
stability_plot_input_afr_indiv_multiancestry_score_comb_low = stability_plot_input_afr_indiv_multiancestry_score_afr_egfr_low %>%
bind_rows(stability_plot_input_afr_indiv_multiancestry_score_meta_egfr_low)
head(stability_plot_input_afr_indiv_multiancestry_score_comb_low)

## make plots- top 98th percentile

### all

#### combined

In [None]:
ggplot(stability_plot_input_comb, aes(x = CONF_THRES, y = STABILITY, fill = TRAINING_GROUP)) +
geom_boxplot() +
scale_x_discrete(limits = rev(levels(factor(stability_plot_input_comb$CONF_THRES)))) +
labs(title = 'PMBB v3 ALL CKD Confidence Thresholding',
     x = 'Confidence Threshold',
     y = 'Stability',
     fill = 'Training Group')
ggsave('output/plots/PMBB_v3.ALL.CKD.YK_Phenotyping.confidence_thresholding.png')

### all individuals multiancestry scores

#### combined

In [None]:
ggplot(stability_plot_input_multiancestry_score_comb, aes(x = CONF_THRES, y = STABILITY, fill = TRAINING_GROUP)) +
geom_boxplot() +
scale_x_discrete(limits = rev(levels(factor(stability_plot_input_multiancestry_score_comb$CONF_THRES)))) +
labs(title = 'PMBB v3 ALL CKD Confidence Thresholding',
     x = 'Confidence Threshold',
     y = 'Stability',
     fill = 'Training Group')
ggsave('output/plots/PMBB_v3.ALL.CKD.YK_Phenotyping.multiancestry_scores.confidence_thresholding.png')

### eur

#### combined

In [None]:
ggplot(stability_plot_input_eur_indiv_comb, aes(x = CONF_THRES, y = STABILITY, fill = TRAINING_GROUP)) +
geom_boxplot() +
scale_x_discrete(limits = rev(levels(factor(stability_plot_input_eur_indiv_comb$CONF_THRES)))) +
labs(title = 'PMBB v3 EUR CKD Confidence Thresholding',
     x = 'Confidence Threshold',
     y = 'Stability',
     fill = 'Training Group')
ggsave('output/plots/PMBB_v3.EUR.CKD.YK_Phenotyping.confidence_thresholding.png')

### eur individuals multiancestry scores

#### combined

In [None]:
ggplot(stability_plot_input_eur_indiv_multiancestry_score_comb, aes(x = CONF_THRES, y = STABILITY, fill = TRAINING_GROUP)) +
geom_boxplot() +
scale_x_discrete(limits = rev(levels(factor(stability_plot_input_eur_indiv_multiancestry_score_comb$CONF_THRES)))) +
labs(title = 'PMBB v3 EUR CKD Confidence Thresholding',
     x = 'Confidence Threshold',
     y = 'Stability',
     fill = 'Training Group')
ggsave('output/plots/PMBB_v3.EUR.CKD.YK_Phenotyping.multiancestry_scores.confidence_thresholding.png')

### afr

#### combined

In [None]:
ggplot(stability_plot_input_afr_indiv_comb, aes(x = CONF_THRES, y = STABILITY, fill = TRAINING_GROUP)) +
geom_boxplot() +
scale_x_discrete(limits = rev(levels(factor(stability_plot_input_afr_indiv_comb$CONF_THRES)))) +
labs(title = 'PMBB v3 AFR CKD Confidence Thresholding',
     x = 'Confidence Threshold',
     y = 'Stability',
     fill = 'Training Group')
ggsave('output/plots/PMBB_v3.AFR.CKD.YK_Phenotyping.confidence_thresholding.png')

### afr individuals multiancestry scores

#### combined

In [None]:
ggplot(stability_plot_input_afr_indiv_multiancestry_score_comb, aes(x = CONF_THRES, y = STABILITY, fill = TRAINING_GROUP)) +
geom_boxplot() +
scale_x_discrete(limits = rev(levels(factor(stability_plot_input_afr_indiv_multiancestry_score_comb$CONF_THRES)))) +
labs(title = 'PMBB v3 AFR CKD Confidence Thresholding',
     x = 'Confidence Threshold',
     y = 'Stability',
     fill = 'Training Group')
ggsave('output/plots/PMBB_v3.AFR.CKD.YK_Phenotyping.multiancestry_scores.confidence_thresholding.png')

## make plots- <= 2nd percentile

### all

#### combined

In [None]:
ggplot(stability_plot_input_comb_low, aes(x = CONF_THRES, y = STABILITY, fill = TRAINING_GROUP)) +
geom_boxplot() +
scale_x_discrete(limits = rev(levels(factor(stability_plot_input_comb_low$CONF_THRES)))) +
labs(title = 'PMBB v3 ALL CKD Confidence Thresholding- <= 2nd Percentile',
     x = 'Confidence Threshold',
     y = 'Stability',
     fill = 'Training Group')
ggsave('output/plots/PMBB_v3.ALL.CKD.YK_Phenotyping.2nd_percentile.confidence_thresholding.png')

### all individuals multiancestry scores

#### combined

In [None]:
ggplot(stability_plot_input_multiancestry_score_comb_low, aes(x = CONF_THRES, y = STABILITY, fill = TRAINING_GROUP)) +
geom_boxplot() +
scale_x_discrete(limits = rev(levels(factor(stability_plot_input_multiancestry_score_comb_low$CONF_THRES)))) +
labs(title = 'PMBB v3 ALL CKD Confidence Thresholding- <= 2nd Percentile',
     x = 'Confidence Threshold',
     y = 'Stability',
     fill = 'Training Group')
ggsave('output/plots/PMBB_v3.ALL.CKD.YK_Phenotyping.multiancestry_scores.2nd_percentile.confidence_thresholding.png')

### eur

#### combined

In [None]:
ggplot(stability_plot_input_eur_indiv_comb_low, aes(x = CONF_THRES, y = STABILITY, fill = TRAINING_GROUP)) +
geom_boxplot() +
scale_x_discrete(limits = rev(levels(factor(stability_plot_input_eur_indiv_comb_low$CONF_THRES)))) +
labs(title = 'PMBB v3 EUR CKD Confidence Thresholding- <= 2nd Percentile',
     x = 'Confidence Threshold',
     y = 'Stability',
     fill = 'Training Group')
ggsave('output/plots/PMBB_v3.EUR.CKD.YK_Phenotyping.2nd_percentile.confidence_thresholding.png')

### eur individuals multiancestry scores

#### combined

In [None]:
ggplot(stability_plot_input_eur_indiv_multiancestry_score_comb_low, aes(x = CONF_THRES, y = STABILITY, fill = TRAINING_GROUP)) +
geom_boxplot() +
scale_x_discrete(limits = rev(levels(factor(stability_plot_input_eur_indiv_multiancestry_score_comb_low$CONF_THRES)))) +
labs(title = 'PMBB v3 EUR CKD Confidence Thresholding- <= 2nd Percentile',
     x = 'Confidence Threshold',
     y = 'Stability',
     fill = 'Training Group')
ggsave('output/plots/PMBB_v3.EUR.CKD.YK_Phenotyping.multiancestry_scores.2nd_percentile.confidence_thresholding.png')

### afr

#### combined

In [None]:
ggplot(stability_plot_input_afr_indiv_comb_low, aes(x = CONF_THRES, y = STABILITY, fill = TRAINING_GROUP)) +
geom_boxplot() +
scale_x_discrete(limits = rev(levels(factor(stability_plot_input_afr_indiv_comb_low$CONF_THRES)))) +
labs(title = 'PMBB v3 AFR CKD Confidence Thresholding- <= 2nd Percentile',
     x = 'Confidence Threshold',
     y = 'Stability',
     fill = 'Training Group')
ggsave('output/plots/PMBB_v3.AFR.CKD.YK_Phenotyping.2nd_percentile.confidence_thresholding.png')

### afr individuals multiancestry scores

#### combined

In [None]:
ggplot(stability_plot_input_afr_indiv_multiancestry_score_comb_low, aes(x = CONF_THRES, y = STABILITY, fill = TRAINING_GROUP)) +
geom_boxplot() +
scale_x_discrete(limits = rev(levels(factor(stability_plot_input_afr_indiv_multiancestry_score_comb_low$CONF_THRES)))) +
labs(title = 'PMBB v3 AFR CKD Confidence Thresholding- <= 2nd Percentile',
     x = 'Confidence Threshold',
     y = 'Stability',
     fill = 'Training Group')
ggsave('output/plots/PMBB_v3.AFR.CKD.YK_Phenotyping.multiancestry_scores.2nd_percentile.confidence_thresholding.png')

# compute ICC

## read in input files (if needed)

### all

In [None]:
df_ntile_norm_wide = fread('output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.individual_percentile.wide_form.txt',
                           sep = '\t')

In [None]:
model_metrics_df = read.csv('output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                            sep = '\t')

### all individuals multiancestry scores

In [None]:
df_ntile_norm_wide_multiancestry_score = fread('output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.multiancestry_scores.individual_percentile.wide_form.txt',
                                               sep = '\t')

In [None]:
model_metrics_df_multiancestry_score = read.csv('output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.performance_metrics.txt',
                                                sep = '\t')

### eur

In [None]:
df_ntile_norm_wide_eur = fread('output/performance_metrics/PMBB_v2.EUR.CKD.PGS_Znorm2.individual_percentile.wide_form.txt',
                               sep = '\t')

In [None]:
model_metrics_df_eur = read.csv('output/performance_metrics/PMBB_v2.EUR.CKD.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                                sep = '\t')

### eur individuals multiancestry scores

In [None]:
df_ntile_norm_wide_eur_multiancestry_score = fread('output/performance_metrics/PMBB_v2.EUR.CKD.PGS_Znorm2.multiancestry_scores.individual_percentile.wide_form.txt',
                                                   sep = '\t')

In [None]:
model_metrics_df_eur_multiancestry_score = read.csv('output/performance_metrics/PMBB_v2.EUR.CKD.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.performance_metrics.txt',
                                                    sep = '\t')

### afr

In [None]:
df_ntile_norm_wide_afr = fread('output/performance_metrics/PMBB_v2.AFR.CKD.PGS_Znorm2.individual_percentile.wide_form.txt',
                               sep = '\t')

In [None]:
model_metrics_df_afr = read.csv('output/performance_metrics/PMBB_v2.AFR.CKD.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                                sep = '\t')

### afr individuals multiancestry scores

In [None]:
df_ntile_norm_wide_afr_multiancestry_score = fread('output/performance_metrics/PMBB_v2.AFR.CKD.PGS_Znorm2.multiancestry_scores.individual_percentile.wide_form.txt',
                                                   sep = '\t')

In [None]:
model_metrics_df_afr_multiancestry_score = read.csv('output/performance_metrics/PMBB_v2.AFR.CKD.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.performance_metrics.txt',
                                                    sep = '\t')

### all

#### eur egfr

In [None]:
df_ntile_norm_wide_all_indiv_eur_egfr_95 = df_ntile_norm_wide[df_ntile_norm_wide$IID %in% df_ntile_norm_eur_egfr_95$IID,]
nrow(df_ntile_norm_wide_all_indiv_eur_egfr_95)

In [None]:
df_ntile_norm_wide_all_indiv_eur_egfr_90 = df_ntile_norm_wide[df_ntile_norm_wide$IID %in% df_ntile_norm_eur_egfr_90$IID,]
nrow(df_ntile_norm_wide_all_indiv_eur_egfr_90)

In [None]:
df_ntile_norm_wide_all_indiv_eur_egfr_80 = df_ntile_norm_wide[df_ntile_norm_wide$IID %in% df_ntile_norm_eur_egfr_80$IID,]
nrow(df_ntile_norm_wide_all_indiv_eur_egfr_80)

In [None]:
df_ntile_norm_wide_all_indiv_eur_egfr_70 = df_ntile_norm_wide[df_ntile_norm_wide$IID %in% df_ntile_norm_eur_egfr_70$IID,]
nrow(df_ntile_norm_wide_all_indiv_eur_egfr_70)

In [None]:
df_ntile_norm_wide_all_indiv_eur_egfr_50 = df_ntile_norm_wide[df_ntile_norm_wide$IID %in% df_ntile_norm_eur_egfr_50$IID,]
nrow(df_ntile_norm_wide_all_indiv_eur_egfr_50)

#### afr egfr

In [None]:
df_ntile_norm_wide_all_indiv_afr_egfr_80 = df_ntile_norm_wide[df_ntile_norm_wide$IID %in% df_ntile_norm_afr_egfr_80$IID,]
nrow(df_ntile_norm_wide_all_indiv_afr_egfr_80)

In [None]:
df_ntile_norm_wide_all_indiv_afr_egfr_70 = df_ntile_norm_wide[df_ntile_norm_wide$IID %in% df_ntile_norm_afr_egfr_70$IID,]
nrow(df_ntile_norm_wide_all_indiv_afr_egfr_70)

In [None]:
df_ntile_norm_wide_all_indiv_afr_egfr_50 = df_ntile_norm_wide[df_ntile_norm_wide$IID %in% df_ntile_norm_afr_egfr_50$IID,]
nrow(df_ntile_norm_wide_all_indiv_afr_egfr_50)

### all individuals multiancestry scores

#### eur egfr

In [None]:
df_ntile_norm_wide_all_indiv_multiancestry_score_eur_egfr_95 = df_ntile_norm_wide_multiancestry_score[df_ntile_norm_wide_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_95$IID,]
nrow(df_ntile_norm_wide_all_indiv_multiancestry_score_eur_egfr_95)

In [None]:
df_ntile_norm_wide_all_indiv_multiancestry_score_eur_egfr_90 = df_ntile_norm_wide_multiancestry_score[df_ntile_norm_wide_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_90$IID,]
nrow(df_ntile_norm_wide_all_indiv_multiancestry_score_eur_egfr_90)

In [None]:
df_ntile_norm_wide_all_indiv_multiancestry_score_eur_egfr_80 = df_ntile_norm_wide_multiancestry_score[df_ntile_norm_wide_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_80$IID,]
nrow(df_ntile_norm_wide_all_indiv_multiancestry_score_eur_egfr_80)

In [None]:
df_ntile_norm_wide_all_indiv_multiancestry_score_eur_egfr_70 = df_ntile_norm_wide_multiancestry_score[df_ntile_norm_wide_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_70$IID,]
nrow(df_ntile_norm_wide_all_indiv_multiancestry_score_eur_egfr_70)

In [None]:
df_ntile_norm_wide_all_indiv_multiancestry_score_eur_egfr_50 = df_ntile_norm_wide_multiancestry_score[df_ntile_norm_wide_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_50$IID,]
nrow(df_ntile_norm_wide_all_indiv_multiancestry_score_eur_egfr_50)

#### afr egfr

In [None]:
df_ntile_norm_wide_all_indiv_multiancestry_score_afr_egfr_80 = df_ntile_norm_wide_multiancestry_score[df_ntile_norm_wide_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_80$IID,]
nrow(df_ntile_norm_wide_all_indiv_multiancestry_score_afr_egfr_80)

In [None]:
df_ntile_norm_wide_all_indiv_multiancestry_score_afr_egfr_70 = df_ntile_norm_wide_multiancestry_score[df_ntile_norm_wide_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_70$IID,]
nrow(df_ntile_norm_wide_all_indiv_multiancestry_score_afr_egfr_70)

In [None]:
df_ntile_norm_wide_all_indiv_multiancestry_score_afr_egfr_50 = df_ntile_norm_wide_multiancestry_score[df_ntile_norm_wide_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_50$IID,]
nrow(df_ntile_norm_wide_all_indiv_multiancestry_score_afr_egfr_50)

### eur

#### eur egfr

In [None]:
df_ntile_norm_wide_eur_indiv_eur_egfr_95 = df_ntile_norm_wide_eur[df_ntile_norm_wide_eur$IID %in% df_ntile_norm_eur_egfr_95$IID,]
nrow(df_ntile_norm_wide_eur_indiv_eur_egfr_95)

In [None]:
df_ntile_norm_wide_eur_indiv_eur_egfr_90 = df_ntile_norm_wide_eur[df_ntile_norm_wide_eur$IID %in% df_ntile_norm_eur_egfr_90$IID,]
nrow(df_ntile_norm_wide_eur_indiv_eur_egfr_90)

In [None]:
df_ntile_norm_wide_eur_indiv_eur_egfr_80 = df_ntile_norm_wide_eur[df_ntile_norm_wide_eur$IID %in% df_ntile_norm_eur_egfr_80$IID,]
nrow(df_ntile_norm_wide_eur_indiv_eur_egfr_80)

In [None]:
df_ntile_norm_wide_eur_indiv_eur_egfr_70 = df_ntile_norm_wide_eur[df_ntile_norm_wide_eur$IID %in% df_ntile_norm_eur_egfr_70$IID,]
nrow(df_ntile_norm_wide_eur_indiv_eur_egfr_70)

In [None]:
df_ntile_norm_wide_eur_indiv_eur_egfr_50 = df_ntile_norm_wide_eur[df_ntile_norm_wide_eur$IID %in% df_ntile_norm_eur_egfr_50$IID,]
nrow(df_ntile_norm_wide_eur_indiv_eur_egfr_50)

### eur indivdiuals multiancestry scores

#### eur egfr

In [None]:
df_ntile_norm_wide_eur_indiv_multiancestry_score_eur_egfr_95 = df_ntile_norm_wide_eur_multiancestry_score[df_ntile_norm_wide_eur_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_95$IID,]
nrow(df_ntile_norm_wide_eur_indiv_multiancestry_score_eur_egfr_95)

In [None]:
df_ntile_norm_wide_eur_indiv_multiancestry_score_eur_egfr_90 = df_ntile_norm_wide_eur_multiancestry_score[df_ntile_norm_wide_eur_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_90$IID,]
nrow(df_ntile_norm_wide_eur_indiv_multiancestry_score_eur_egfr_90)

In [None]:
df_ntile_norm_wide_eur_indiv_multiancestry_score_eur_egfr_80 = df_ntile_norm_wide_eur_multiancestry_score[df_ntile_norm_wide_eur_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_80$IID,]
nrow(df_ntile_norm_wide_eur_indiv_multiancestry_score_eur_egfr_80)

In [None]:
df_ntile_norm_wide_eur_indiv_multiancestry_score_eur_egfr_70 = df_ntile_norm_wide_eur_multiancestry_score[df_ntile_norm_wide_eur_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_70$IID,]
nrow(df_ntile_norm_wide_eur_indiv_multiancestry_score_eur_egfr_70)

In [None]:
df_ntile_norm_wide_eur_indiv_multiancestry_score_eur_egfr_50 = df_ntile_norm_wide_eur_multiancestry_score[df_ntile_norm_wide_eur_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_50$IID,]
nrow(df_ntile_norm_wide_eur_indiv_multiancestry_score_eur_egfr_50)

### afr

#### afr egfr

In [None]:
df_ntile_norm_wide_eur_indiv_afr_egfr_80 = df_ntile_norm_wide_afr[df_ntile_norm_wide_afr$IID %in% df_ntile_norm_afr_egfr_80$IID,]
nrow(df_ntile_norm_wide_eur_indiv_afr_egfr_80)

In [None]:
df_ntile_norm_wide_eur_indiv_afr_egfr_70 = df_ntile_norm_wide_afr[df_ntile_norm_wide_afr$IID %in% df_ntile_norm_afr_egfr_70$IID,]
nrow(df_ntile_norm_wide_eur_indiv_afr_egfr_70)

In [None]:
df_ntile_norm_wide_eur_indiv_afr_egfr_50 = df_ntile_norm_wide_afr[df_ntile_norm_wide_afr$IID %in% df_ntile_norm_afr_egfr_50$IID,]
nrow(df_ntile_norm_wide_eur_indiv_afr_egfr_50)

### afr individuals multiancestry scores

#### afr egfr

In [None]:
df_ntile_norm_wide_afr_indiv_multiancestry_score_afr_egfr_80 = df_ntile_norm_wide_afr_multiancestry_score[df_ntile_norm_wide_afr_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_80$IID,]
nrow(df_ntile_norm_wide_afr_indiv_multiancestry_score_afr_egfr_80)

In [None]:
df_ntile_norm_wide_afr_indiv_multiancestry_score_afr_egfr_70 = df_ntile_norm_wide_afr_multiancestry_score[df_ntile_norm_wide_afr_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_70$IID,]
nrow(df_ntile_norm_wide_afr_indiv_multiancestry_score_afr_egfr_70)

In [None]:
df_ntile_norm_wide_afr_indiv_multiancestry_score_afr_egfr_50 = df_ntile_norm_wide_afr_multiancestry_score[df_ntile_norm_wide_afr_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_50$IID,]
nrow(df_ntile_norm_wide_afr_indiv_multiancestry_score_afr_egfr_50)

## create function

In [None]:
equiv_scores <- function(df, criteria, ntile = FALSE) {
    # Ensure criteria is a column in df
    if (!criteria %in% names(df)) {
        stop(paste0("Criteria column ", criteria, " not found in dataframe"))
    }
  
    # Filter models based on criteria for each group
    models <- df %>%
    group_by(model) %>%
    filter(all(.data[[criteria]] == 1)) %>%
    ungroup() %>%
    pull(model) %>%
    unique()
  
    # Sort models
    sorted_models <- models
  
    # Return the appropriate list based on ntile flag
    if (ntile) {
        ntile_list <- paste("ntile_", sorted_models, sep = "")
        return(ntile_list)
    } else {
        return(sorted_models)
    }
}

In [None]:
ICC_equiv <- function(df_ntile_norm,  model_metrics_df, criteria) {
    ntile_list <- equiv_scores(model_metrics_df, criteria, ntile = TRUE)
  
    df_ntile_criteria <- df_ntile_norm %>% 
    select(all_of(ntile_list)) %>% 
    icc(., model = "twoway", type = "agreement")
  
    df_ntile_criteria$metric <- criteria
  
    df_ntile_tibble <- tibble(
    icc = df_ntile_criteria$value,
    ubound = df_ntile_criteria$ubound,
    lbound = df_ntile_criteria$lbound,
    Raters = df_ntile_criteria$raters,
    Criteria = df_ntile_criteria$metric)
  
    return(df_ntile_tibble)
}

## all individuals

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide, model_metrics_df, m)
}) %>% 
  bind_rows(.)

head(ICC_res)
write.table(ICC_res,
            'output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

## all individuals with mulit-ancestry scores

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_multiancestry_score <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_multiancestry_score, model_metrics_df_multiancestry_score, m)
}) %>% 
  bind_rows(.)

head(ICC_res_multiancestry_score)
write.table(ICC_res_multiancestry_score,
            'output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

### eur egfr

#### 95%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_95 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_95, model_metrics_df, m)
}) %>% 
  bind_rows(.)

head(ICC_res_95)
write.table(ICC_res_95,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.95_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

#### 90%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_90 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_90, model_metrics_df, m)
}) %>% 
  bind_rows(.)

head(ICC_res_90)
write.table(ICC_res_90,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.90_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

#### 80%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_80 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_80, model_metrics_df, m)
}) %>% 
  bind_rows(.)

head(ICC_res_80)
write.table(ICC_res_80,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.80_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

#### 70%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_70 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_70, model_metrics_df, m)
}) %>% 
  bind_rows(.)

head(ICC_res_70)
write.table(ICC_res_70,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.70_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

#### 50%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_50 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_50, model_metrics_df, m)
}) %>% 
  bind_rows(.)

head(ICC_res_50)
write.table(ICC_res_50,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.50_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

### afr egfr

#### 80%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_all_indiv_afr_egfr_80 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_all_indiv_afr_egfr_80, model_metrics_df, m)
}) %>% 
  bind_rows(.)

head(ICC_res_all_indiv_afr_egfr_80)
write.table(ICC_res_all_indiv_afr_egfr_80,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.AFR_eGFR.flip.80_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

#### 70%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_all_indiv_afr_egfr_70 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_all_indiv_afr_egfr_70, model_metrics_df, m)
}) %>% 
  bind_rows(.)

head(ICC_res_all_indiv_afr_egfr_70)
write.table(ICC_res_all_indiv_afr_egfr_70,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.AFR_eGFR.flip.70_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

#### 50%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_all_indiv_afr_egfr_50 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_all_indiv_afr_egfr_80, model_metrics_df, m)
}) %>% 
  bind_rows(.)

head(ICC_res_all_indiv_afr_egfr_50)
write.table(ICC_res_all_indiv_afr_egfr_50,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.AFR_eGFR.flip.50_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

### eur egfr

#### 95%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_multiancestry_score_95 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_multiancestry_score_95, model_metrics_df_multiancestry_score, m)
}) %>% 
  bind_rows(.)

head(ICC_res_multiancestry_score_95)
write.table(ICC_res_multiancestry_score_95,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.multiancestry_scores.95_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

#### 90%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_multiancestry_score_90 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_multiancestry_score_90, model_metrics_df_multiancestry_score, m)
}) %>% 
  bind_rows(.)

head(ICC_res_multiancestry_score_90)
write.table(ICC_res_multiancestry_score_90,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.multiancestry_scores.90_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

#### 80%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_multiancestry_score_80 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_multiancestry_score_80, model_metrics_df_multiancestry_score, m)
}) %>% 
  bind_rows(.)

head(ICC_res_multiancestry_score_80)
write.table(ICC_res_multiancestry_score_80,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.multiancestry_scores.80_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

#### 70%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_multiancestry_score_70 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_multiancestry_score_70, model_metrics_df_multiancestry_score, m)
}) %>% 
  bind_rows(.)

head(ICC_res_multiancestry_score_70)
write.table(ICC_res_multiancestry_score_70,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.multiancestry_scores.70_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

#### 50%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_multiancestry_score_50 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_multiancestry_score_50, model_metrics_df_multiancestry_score, m)
}) %>% 
  bind_rows(.)

head(ICC_res_multiancestry_score_50)
write.table(ICC_res_multiancestry_score_50,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.multiancestry_scores.50_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

### afr egfr

#### 80%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_all_indiv_multiancestry_score_afr_egfr_80 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_all_indiv_multiancestry_score_afr_egfr_80, model_metrics_df_multiancestry_score, m)
}) %>% 
  bind_rows(.)

head(ICC_res_all_indiv_multiancestry_score_afr_egfr_80)
write.table(ICC_res_all_indiv_multiancestry_score_afr_egfr_80,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.multiancestry_scores.AFR_eGFR_flip.80_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

#### 70%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_all_indiv_multiancestry_score_afr_egfr_70 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_all_indiv_multiancestry_score_afr_egfr_70, model_metrics_df_multiancestry_score, m)
}) %>% 
  bind_rows(.)

head(ICC_res_all_indiv_multiancestry_score_afr_egfr_70)
write.table(ICC_res_all_indiv_multiancestry_score_afr_egfr_70,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.multiancestry_scores.AFR_eGFR_flip.70_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

#### 50%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_all_indiv_multiancestry_score_afr_egfr_50 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_all_indiv_multiancestry_score_afr_egfr_50, model_metrics_df_multiancestry_score, m)
}) %>% 
  bind_rows(.)

head(ICC_res_all_indiv_multiancestry_score_afr_egfr_50)
write.table(ICC_res_all_indiv_multiancestry_score_afr_egfr_50,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.multiancestry_scores.AFR_eGFR_flip.50_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

## eur

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_eur <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_eur, model_metrics_df_eur, m)
}) %>% 
  bind_rows(.)

head(ICC_res_eur)
write.table(ICC_res_eur,
            'output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

## eur individuals with multi ancestry scores

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_eur_multiancestry_score <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_eur_multiancestry_score, model_metrics_df_eur_multiancestry_score, m)
}) %>% 
  bind_rows(.)

head(ICC_res_eur_multiancestry_score)
write.table(ICC_res_eur_multiancestry_score,
            'output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

### 95%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_eur_95 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_eur_95, model_metrics_df_eur, m)
}) %>% 
  bind_rows(.)

head(ICC_res_eur_95)
write.table(ICC_res_eur_95,
            'output/performance_metrics/PMBB_v2.EUR.CKD.PGS_Znorm2.95_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

### 90%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_eur_90 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_eur_90, model_metrics_df_eur, m)
}) %>% 
  bind_rows(.)

head(ICC_res_eur_90)
write.table(ICC_res_eur_90,
            'output/performance_metrics/PMBB_v2.EUR.CKD.PGS_Znorm2.90_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

### 80%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_eur_80 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_eur_80, model_metrics_df_eur, m)
}) %>% 
  bind_rows(.)

head(ICC_res_eur_80)
write.table(ICC_res_eur_80,
            'output/performance_metrics/PMBB_v2.EUR.CKD.PGS_Znorm2.80_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

### 70%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_eur_70 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_eur_70, model_metrics_df_eur, m)
}) %>% 
  bind_rows(.)

head(ICC_res_eur_70)
write.table(ICC_res_eur_70,
            'output/performance_metrics/PMBB_v2.EUR.CKD.PGS_Znorm2.70_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

### 50%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_eur_50 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_eur_50, model_metrics_df_eur, m)
}) %>% 
  bind_rows(.)

head(ICC_res_eur_50)
write.table(ICC_res_eur_50,
            'output/performance_metrics/PMBB_v2.EUR.CKD.PGS_Znorm2.50_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

### 95%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_eur_multiancestry_score_95 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_eur_multiancestry_score_95, model_metrics_df_eur_multiancestry_score, m)
}) %>% 
  bind_rows(.)

head(ICC_res_eur_multiancestry_score_95)
write.table(ICC_res_eur_multiancestry_score_95,
            'output/performance_metrics/PMBB_v2.EUR.CKD.PGS_Znorm2.multiancestry_scores.95_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

### 90%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_eur_multiancestry_score_90 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_eur_multiancestry_score_90, model_metrics_df_eur_multiancestry_score, m)
}) %>% 
  bind_rows(.)

head(ICC_res_eur_multiancestry_score_90)
write.table(ICC_res_eur_multiancestry_score_90,
            'output/performance_metrics/PMBB_v2.EUR.CKD.PGS_Znorm2.multiancestry_scores.90_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

### 80%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_eur_multiancestry_score_80 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_eur_multiancestry_score_80, model_metrics_df_eur_multiancestry_score, m)
}) %>% 
  bind_rows(.)

head(ICC_res_eur_multiancestry_score_80)
write.table(ICC_res_eur_multiancestry_score_80,
            'output/performance_metrics/PMBB_v2.EUR.CKD.PGS_Znorm2.multiancestry_scores.80_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

### 70%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_eur_multiancestry_score_70 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_eur_multiancestry_score_70, model_metrics_df_eur_multiancestry_score, m)
}) %>% 
  bind_rows(.)

head(ICC_res_eur_multiancestry_score_70)
write.table(ICC_res_eur_multiancestry_score_70,
            'output/performance_metrics/PMBB_v2.EUR.CKD.PGS_Znorm2.multiancestry_scores.70_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

### 50%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_eur_multiancestry_score_50 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_eur_multiancestry_score_50, model_metrics_df_eur_multiancestry_score, m)
}) %>% 
  bind_rows(.)

head(ICC_res_eur_multiancestry_score_50)
write.table(ICC_res_eur_multiancestry_score_50,
            'output/performance_metrics/PMBB_v2.EUR.CKD.PGS_Znorm2.multiancestry_scores.50_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

## afr

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_afr <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_afr, model_metrics_df_afr, m)
}) %>% 
  bind_rows(.)

head(ICC_res_afr)
write.table(ICC_res_afr,
            'output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

## afr individuals with multi ancestry scores

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_afr_multiancestry_score <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_afr_multiancestry_score, model_metrics_df_afr_multiancestry_score, m)
}) %>% 
  bind_rows(.)

head(ICC_res_afr_multiancestry_score)
write.table(ICC_res_afr_multiancestry_score,
            'output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

### 80%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_afr_indiv_afr_egfr_80 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_all_indiv_afr_egfr_80, model_metrics_df_afr, m)
}) %>%
  bind_rows(.)

head(ICC_res_afr_indiv_afr_egfr_80)
write.table(ICC_res_afr_indiv_afr_egfr_80,
            'output/performance_metrics/PMBB_v2.AFR.CKD.PGS_Znorm2.AFR_eGFR_flip.80_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

### 70%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_afr_indiv_afr_egfr_70 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_all_indiv_afr_egfr_70, model_metrics_df_afr, m)
}) %>%
  bind_rows(.)

head(ICC_res_afr_indiv_afr_egfr_70)
write.table(ICC_res_afr_indiv_afr_egfr_70,
            'output/performance_metrics/PMBB_v2.AFR.CKD.PGS_Znorm2.AFR_eGFR_flip.70_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

### 50%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_afr_indiv_afr_egfr_50 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_all_indiv_afr_egfr_50, model_metrics_df_afr, m)
}) %>%
  bind_rows(.)

head(ICC_res_afr_indiv_afr_egfr_50)
write.table(ICC_res_afr_indiv_afr_egfr_50,
            'output/performance_metrics/PMBB_v2.AFR.CKD.PGS_Znorm2.AFR_eGFR_flip.50_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

### 80%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_afr_indiv_multiancestry_score_afr_egfr_80 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_afr_indiv_multiancestry_score_afr_egfr_80, model_metrics_df_afr_multiancestry_score, m)
}) %>% 
  bind_rows(.)

head(ICC_res_afr_indiv_multiancestry_score_afr_egfr_80)
write.table(ICC_res_afr_indiv_multiancestry_score_afr_egfr_80,
            'output/performance_metrics/PMBB_v2.AFR.CKD.PGS_Znorm2.multiancestry_scores.AFR_eGFR_flip.80_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

### 70%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_afr_indiv_multiancestry_score_afr_egfr_70 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_afr_indiv_multiancestry_score_afr_egfr_70, model_metrics_df_afr_multiancestry_score, m)
}) %>% 
  bind_rows(.)

head(ICC_res_afr_indiv_multiancestry_score_afr_egfr_70)
write.table(ICC_res_afr_indiv_multiancestry_score_afr_egfr_70,
            'output/performance_metrics/PMBB_v2.AFR.CKD.PGS_Znorm2.multiancestry_scores.AFR_eGFR_flip.70_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

### 50%

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
ICC_res_afr_indiv_multiancestry_score_afr_egfr_50 <- lapply(metrics, function(m){
  ICC_equiv(df_ntile_norm_wide_afr_indiv_multiancestry_score_afr_egfr_50, model_metrics_df_afr_multiancestry_score, m)
}) %>% 
  bind_rows(.)

head(ICC_res_afr_indiv_multiancestry_score_afr_egfr_50)
write.table(ICC_res_afr_indiv_multiancestry_score_afr_egfr_50,
            'output/performance_metrics/PMBB_v2.AFR.CKD.PGS_Znorm2.multiancestry_scores.AFR_eGFR_flip.50_conf.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

## make line graph- PMBB only

### read in input files (if needed)

#### all

In [None]:
ICC_res = read.csv('output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.individual_agreement.ICC.txt',
                    sep = '\t')

#### all individuals multiancestry scores

In [None]:
ICC_res_multiancestry_score = read.csv('output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
                                        sep = '\t')

#### eur

In [None]:
ICC_res_eur = read.csv('output/performance_metrics/PMBB_v2.EUR.CKD.PGS_Znorm2.individual_agreement.ICC.txt',
                        sep = '\t')

#### eur individuals multiancestry scores

In [None]:
ICC_res_eur_multiancestry_score = read.csv('output/performance_metrics/PMBB_v2.EUR.CKD.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
                                            sep = '\t')

#### afr

In [None]:
ICC_res_afr = read.csv('output/performance_metrics/PMBB_v2.AFR.CKD.PGS_Znorm2.individual_agreement.ICC.txt',
                        sep = '\t')

#### afr individuals multiancestry scores

In [None]:
ICC_res_afr_multiancestry_score = read.csv('output/performance_metrics/PMBB_v2.AFR.CKD.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
                                            sep = '\t')

### merge

In [None]:
icc_merge = ICC_res%>%
select(Criteria, icc) %>%
rename('ALL_indiv.all_scores' = 'icc') %>%
left_join(ICC_res_multiancestry_score, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters)) %>%
rename('ALL_indiv.eur_multiancestry_scores' = 'icc') %>%
left_join(ICC_res_eur, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters)) %>%
rename('EUR_indiv.all_scores' = 'icc') %>%
left_join(ICC_res_eur_multiancestry_score, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters)) %>%
rename('EUR_indiv.eur_multiancestry_scores' = 'icc') %>%
left_join(ICC_res_afr, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters)) %>%
rename('AFR_indiv.all_scores' = 'icc') %>%
left_join(ICC_res_afr_multiancestry_score, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters)) %>%
rename('AFR_indiv.eur_multiancestry_scores' = 'icc') %>%
mutate(Criteria = gsub('prob_dif', 'Statistically_Equivalent', Criteria)) %>%
mutate(Criteria = gsub('ROPE_005', 'ROPE_0.005', Criteria)) %>%
mutate(Criteria = gsub('ROPE_01', 'ROPE_0.01', Criteria)) %>%
mutate(Criteria = gsub('ROPE_02', 'ROPE_0.02', Criteria))
icc_merge

### convert to long form

In [None]:
icc_merge_long <- icc_merge %>%
pivot_longer(cols = ends_with("_scores"),
               names_to = "score_type",
               values_to = "score_value") %>%
mutate(Criteria = factor(Criteria, levels = c("Statistically_Equivalent", "ROPE_0.005", "ROPE_0.01", "ROPE_0.02")))
icc_merge_long

### make plot

In [None]:
icc_plot = ggplot(icc_merge_long, aes(x = Criteria, y = score_value, color = score_type, group = score_type)) +
geom_point() +
geom_line() +
labs(title = "PMBB v3 CKD PGS ICC",
     x = "Equivalence Criteria",
     y = "ICC",
     color = "Score") +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
     plot.margin = unit(c(1, 1, 1, 1), "cm"))

icc_plot
ggsave('output/plots/PMBB_v3.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.ICC.line_graph.png',
      width = 10,
      height = 6,
      dpi = 300,
      bg = "white")

## make bar plot- AOU + PMBB combined

### read in input files

In [None]:
ICC_res_pmbb = read.csv('output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.ICC.txt',
                        sep = '\t')
ICC_res_multiancestry_score_pmbb = read.csv('output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
                                            sep = '\t')
ICC_res_eur_pmbb = read.csv('output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.ICC.txt',
                            sep = '\t')
ICC_res_eur_multiancestry_score_pmbb = read.csv('output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
                                                sep = '\t')
ICC_res_afr_pmbb = read.csv('output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.ICC.txt',
                            sep = '\t')
ICC_res_afr_multiancestry_score_pmbb = read.csv('output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
                                                sep = '\t')

In [None]:
ICC_res_aou = read.csv('AOU/AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.ICC.txt',
                        sep = '\t')
ICC_res_multiancestry_score_aou = read.csv('AOU/AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
                                           sep = '\t')
ICC_res_eur_aou = read.csv('AOU/AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.ICC.txt',
                           sep = '\t')
ICC_res_eur_multiancestry_score_aou = read.csv('AOU/AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
                                               sep = '\t')
ICC_res_afr_aou = read.csv('AOU/AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.ICC.txt',
                           sep = '\t')
ICC_res_afr_multiancestry_score_aou = read.csv('AOU/AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
                                               sep = '\t')

### create combined inputs

In [None]:
icc_merge_pmbb = ICC_res_pmbb %>%
filter(Criteria == 'ROPE_02') %>%
select(icc) %>%
rename('ALL_indiv.all_scores' = 'icc') %>%
cbind(ICC_res_multiancestry_score_pmbb) %>%
filter(Criteria == 'ROPE_02') %>%
select(ALL_indiv.all_scores, icc) %>%
rename('ALL_indiv.eur_multiancestry_scores' = 'icc') %>%
cbind(ICC_res_eur_pmbb) %>%
filter(Criteria == 'ROPE_02') %>%
select(ALL_indiv.all_scores, ALL_indiv.eur_multiancestry_scores, icc) %>%
rename('EUR_indiv.all_scores' = 'icc') %>%
cbind(ICC_res_eur_multiancestry_score_pmbb) %>%
filter(Criteria == 'ROPE_02') %>%
select(ALL_indiv.all_scores, ALL_indiv.eur_multiancestry_scores, EUR_indiv.all_scores, icc) %>%
rename('EUR_indiv.eur_multiancestry_scores' = 'icc') %>%
cbind(ICC_res_afr_pmbb) %>%
filter(Criteria == 'ROPE_02') %>%
select(ALL_indiv.all_scores, ALL_indiv.eur_multiancestry_scores, EUR_indiv.all_scores, EUR_indiv.eur_multiancestry_scores, icc) %>%
rename('AFR_indiv.all_scores' = 'icc') %>%
cbind(ICC_res_afr_multiancestry_score_pmbb) %>%
filter(Criteria == 'ROPE_02') %>%
select(ALL_indiv.all_scores, ALL_indiv.eur_multiancestry_scores, EUR_indiv.all_scores, EUR_indiv.eur_multiancestry_scores, AFR_indiv.all_scores, icc) %>%
mutate(Dataset = 'PMBB') %>%
rename('AFR_indiv.eur_multiancestry_scores' = 'icc')
icc_merge_pmbb

In [None]:
icc_merge_aou = ICC_res_aou %>%
filter(Criteria == 'ROPE_02') %>%
select(icc) %>%
rename('ALL_indiv.all_scores' = 'icc') %>%
cbind(ICC_res_multiancestry_score_aou) %>%
filter(Criteria == 'ROPE_02') %>%
select(ALL_indiv.all_scores, icc) %>%
rename('ALL_indiv.eur_multiancestry_scores' = 'icc') %>%
cbind(ICC_res_eur_aou) %>%
filter(Criteria == 'ROPE_02') %>%
select(ALL_indiv.all_scores, ALL_indiv.eur_multiancestry_scores, icc) %>%
rename('EUR_indiv.all_scores' = 'icc') %>%
cbind(ICC_res_eur_multiancestry_score_aou) %>%
filter(Criteria == 'ROPE_02') %>%
select(ALL_indiv.all_scores, ALL_indiv.eur_multiancestry_scores, EUR_indiv.all_scores, icc) %>%
rename('EUR_indiv.eur_multiancestry_scores' = 'icc') %>%
cbind(ICC_res_afr_aou) %>%
filter(Criteria == 'ROPE_02') %>%
select(ALL_indiv.all_scores, ALL_indiv.eur_multiancestry_scores, EUR_indiv.all_scores, EUR_indiv.eur_multiancestry_scores, icc) %>%
rename('AFR_indiv.all_scores' = 'icc') %>%
cbind(ICC_res_afr_multiancestry_score_aou) %>%
filter(Criteria == 'ROPE_02') %>%
select(ALL_indiv.all_scores, ALL_indiv.eur_multiancestry_scores, EUR_indiv.all_scores, EUR_indiv.eur_multiancestry_scores, AFR_indiv.all_scores, icc) %>%
mutate(Dataset = 'AOU') %>%
rename('AFR_indiv.eur_multiancestry_scores' = 'icc')
icc_merge_aou

In [None]:
icc_merge_all = rbind(icc_merge_pmbb, icc_merge_aou)
icc_merge_all

### convert to long form

In [None]:
icc_merge_long <- icc_merge_all %>%
pivot_longer(cols = ends_with("_scores"),
             names_to = "score_type",
             values_to = "score_value")
icc_merge_long

### make bar plot

In [None]:
icc_bar_plot = ggplot(icc_merge_long, aes(x = score_type, y = score_value, fill = Dataset)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "AOU and PMBB ICC",
     x = "Score",
     y = "ICC",
     fill = "Dataset") +
scale_fill_manual(values = c("#990000", "#011F5B")) +
theme(axis.title.x = element_blank(), axis.text.x  = element_blank(), axis.ticks.x = element_blank(), legend.position = 'right') +
ylim(0, 1)

icc_bar_plot
ggsave('output/plots/AOU.PMBB_v3.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.ROPE_0.02.ICC.bar_plot.png',
      width = 10,
      height = 6,
      dpi = 300,
      bg = "white")

# compute light's kappa

## make equiv scores function (if needed)

In [None]:
equiv_scores <- function(df, criteria, ntile = FALSE) {
  # Ensure criteria is a column in df
  if (!criteria %in% names(df)) {
    #stop("Criteria column not found in dataframe")
    stop(paste0("Criteria column ", criteria, " not found in dataframe"))
  }
  
  # Filter models based on criteria for each group
  models <- df %>%
    group_by(model) %>%
    filter(all(.data[[criteria]] == 1)) %>%
    ungroup() %>%
    pull(model) %>%
    unique()
  
  # Sort models
  sorted_models <- models
  
  # Return the appropriate list based on ntile flag
  if (ntile) {
    ntile_list <- paste("ntile_", sorted_models, sep = "")
    return(ntile_list)
  } else {
    return(sorted_models)
  }
}

## write functions

In [None]:
# Re-doing function to tolerate non-integers?
"kappam.light" <-
function(ratings) {
    ratings <- as.matrix(na.omit(ratings))

    ns <- nrow(ratings)
    nr <- ncol(ratings)
    
    for (i in 1:(nr-1))
        for (j in (i+1):nr) {
            if ((i==1) & (j==(i+1))) kappas <- kappa2(ratings[,c(i,j)], weight = "u")$value
            else kappas <- c(kappas, kappa2(ratings[,c(i,j)], weight = "u")$value)
        }
        
    value <- mean(kappas)

    #Variance & Computation of p-value
    lev <- levels(as.factor(ratings))
    levlen <- length(levels(as.factor(ratings)))

    for (nri in 1:(nr-1))
        for (nrj in (nri+1):nr) {
            for (i in 1:levlen)
                for (j in 1:levlen) {
                    if (i!=j) {
                        r1i <- sum(ratings[,nri] == lev[i])
                        r2j <- sum(ratings[,nrj] == lev[j])
                        
                        # Convert to numeric before multiplication
                        r1i_numeric <- as.numeric(r1i)
                        r2j_numeric <- as.numeric(r2j)
                        if (!exists("dis")) dis <- r1i_numeric * r2j_numeric
                        else dis <- c(dis, r1i_numeric * r2j_numeric)
                    }
                }
                if (!exists("disrater")) disrater <- sum(dis)
                else disrater <- c(disrater,sum(dis))
                rm(dis)
            }
    B <- length(disrater) * prod(disrater)
    chanceP  <- 1-B / ns^(choose(nr, 2) * 2)
    varkappa <- chanceP / (ns * (1-chanceP))

    SEkappa <- sqrt(varkappa)
    u <- value / SEkappa
    p.value <- 2 * (1 - pnorm(abs(u)))

    rval <- structure(list(method = "Light's Kappa for m Raters",
                           subjects = ns, raters = nr,
                           irr.name = "Kappa", value = value,
                           stat.name = "z", statistic = u, p.value = p.value),
                      class="irrlist")
    return(rval)
}

In [None]:
create_binary_df <- function(df, ntile_list, percentile) {
    
    df_ntile_top <- df %>% 
    select(IID, all_of(ntile_list)) %>% 
    pivot_longer(cols = starts_with("ntile"), names_to = "score", values_to = "ntile")
    
    for (i in seq_along(percentile)) {
        binary_col <- paste0("ntile_", percentile[i])
        
        df_ntile_top <- df_ntile_top %>%
        mutate(!!binary_col := ifelse(ntile >= percentile[i], 1.0, 0.0))
    }
  
    return(df_ntile_top)
}

In [None]:
calculate_and_kappa <- function(df_ntile_top, percentile, ntile_list, metric_name) {
    binary_col <- paste0("ntile_", percentile)

    df_binary <- df_ntile_top %>%
    dplyr::select(score, !!binary_col, IID) %>%
    pivot_wider(names_from = score, values_from = !!binary_col) %>%
    dplyr::select(all_of(ntile_list))

    kappa_value <- as.numeric(kappam.light(df_binary)$value)

    return(data.frame(metric = metric_name, percentile = percentile, kappa_value = kappa_value))
}

## compute

#### all

In [None]:
process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics_df, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(df_ntile_norm_wide, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

metrics <- c("ROPE_01", "ROPE_02")
percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

combined_kappa_results <- do.call(rbind, kappa_results)
# View the final structured data frame
combined_kappa_results
write.table(combined_kappa_results,
            'output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

#### all individuals multiancestry scores

In [None]:
process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics_df_multiancestry_score, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(df_ntile_norm_wide_multiancestry_score, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

metrics <- c("ROPE_01", "ROPE_02")
percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results_multiancestry_score <- lapply(metrics, process_list)

combined_kappa_results_multiancestry_score <- do.call(rbind, kappa_results_multiancestry_score)
# View the final structured data frame
combined_kappa_results_multiancestry_score
write.table(combined_kappa_results_multiancestry_score,
            'output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

##### 95%

In [None]:
process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics_df, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(df_ntile_norm_wide_95, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

metrics <- c("ROPE_01", "ROPE_02")
percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

combined_kappa_results_95 <- do.call(rbind, kappa_results)
# View the final structured data frame
combined_kappa_results_95
write.table(combined_kappa_results_95,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.95_conf.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

##### 90%

In [None]:
process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics_df, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(df_ntile_norm_wide_90, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

metrics <- c("ROPE_01", "ROPE_02")
percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

combined_kappa_results_90 <- do.call(rbind, kappa_results)
# View the final structured data frame
combined_kappa_results_90
write.table(combined_kappa_results_90,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.90_conf.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

##### 80%

In [None]:
process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics_df, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(df_ntile_norm_wide_80, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

metrics <- c("ROPE_01", "ROPE_02")
percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

combined_kappa_results_80 <- do.call(rbind, kappa_results)
# View the final structured data frame
combined_kappa_results_80
write.table(combined_kappa_results_80,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.80_conf.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

##### 70%

In [None]:
process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics_df, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(df_ntile_norm_wide_70, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

metrics <- c("ROPE_01", "ROPE_02")
percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

combined_kappa_results_70 <- do.call(rbind, kappa_results)
# View the final structured data frame
combined_kappa_results_70
write.table(combined_kappa_results_70,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.70_conf.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

##### 50%

In [None]:
process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics_df, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(df_ntile_norm_wide_50, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

metrics <- c("ROPE_01", "ROPE_02")
percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

combined_kappa_results_50 <- do.call(rbind, kappa_results)
# View the final structured data frame
combined_kappa_results_50
write.table(combined_kappa_results_50,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.50_conf.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

##### 95%

In [None]:
process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics_df_multiancestry_score, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(df_ntile_norm_wide_multiancestry_score_95, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

metrics <- c("ROPE_01", "ROPE_02")
percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results_multiancestry_score <- lapply(metrics, process_list)

combined_kappa_results_multiancestry_score_95 <- do.call(rbind, kappa_results_multiancestry_score)
# View the final structured data frame
combined_kappa_results_multiancestry_score_95
write.table(combined_kappa_results_multiancestry_score_95,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.multiancestry_scores.95_conf.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

##### 90%

In [None]:
process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics_df_multiancestry_score, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(df_ntile_norm_wide_multiancestry_score_90, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

metrics <- c("ROPE_01", "ROPE_02")
percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results_multiancestry_score <- lapply(metrics, process_list)

combined_kappa_results_multiancestry_score_90 <- do.call(rbind, kappa_results_multiancestry_score)
# View the final structured data frame
combined_kappa_results_multiancestry_score_90
write.table(combined_kappa_results_multiancestry_score_90,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.multiancestry_scores.90_conf.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

##### 80%

In [None]:
process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics_df_multiancestry_score, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(df_ntile_norm_wide_multiancestry_score_80, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

metrics <- c("ROPE_01", "ROPE_02")
percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results_multiancestry_score <- lapply(metrics, process_list)

combined_kappa_results_multiancestry_score_80 <- do.call(rbind, kappa_results_multiancestry_score)
# View the final structured data frame
combined_kappa_results_multiancestry_score_80
write.table(combined_kappa_results_multiancestry_score_80,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.multiancestry_scores.80_conf.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

##### 70%

In [None]:
process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics_df_multiancestry_score, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(df_ntile_norm_wide_multiancestry_score_70, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

metrics <- c("ROPE_01", "ROPE_02")
percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results_multiancestry_score <- lapply(metrics, process_list)

combined_kappa_results_multiancestry_score_70 <- do.call(rbind, kappa_results_multiancestry_score)
# View the final structured data frame
combined_kappa_results_multiancestry_score_70
write.table(combined_kappa_results_multiancestry_score_70,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.multiancestry_scores.70_conf.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

##### 50%

In [None]:
process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics_df_multiancestry_score, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(df_ntile_norm_wide_multiancestry_score_50, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

metrics <- c("ROPE_01", "ROPE_02")
percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results_multiancestry_score <- lapply(metrics, process_list)

combined_kappa_results_multiancestry_score_50 <- do.call(rbind, kappa_results_multiancestry_score)
# View the final structured data frame
combined_kappa_results_multiancestry_score_50
write.table(combined_kappa_results_multiancestry_score_50,
            'output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.multiancestry_scores.50_conf.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

#### eur

In [None]:
process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics_df_eur, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(df_ntile_norm_wide_eur, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

metrics <- c("ROPE_01", "ROPE_02")
percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results_eur <- lapply(metrics, process_list)

combined_kappa_results_eur <- do.call(rbind, kappa_results_eur)
# View the final structured data frame
combined_kappa_results_eur
write.table(combined_kappa_results_eur,
            'output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

#### eur individuals multiancestry scores

In [None]:
process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics_df_eur_multiancestry_score, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(df_ntile_norm_wide_eur_multiancestry_score, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

metrics <- c("ROPE_01", "ROPE_02")
percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results_eur_multiancestry_score <- lapply(metrics, process_list)

combined_kappa_results_eur_multiancestry_score <- do.call(rbind, kappa_results_eur_multiancestry_score)
# View the final structured data frame
combined_kappa_results_eur_multiancestry_score
write.table(combined_kappa_results_eur_multiancestry_score,
            'output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

#### afr

In [None]:
process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics_df_afr, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(df_ntile_norm_wide_afr, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

metrics <- c("prob_dif", "ROPE_01", "ROPE_02")
percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results_afr <- lapply(metrics, process_list)

combined_kappa_results_afr <- do.call(rbind, kappa_results_afr)
# View the final structured data frame
combined_kappa_results_afr
write.table(combined_kappa_results_afr,
            'output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

#### afr individuals multiancestry scores

In [None]:
process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics_df_afr_multiancestry_score, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(df_ntile_norm_wide_afr_multiancestry_score, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results_afr_multiancestry_score <- lapply(metrics, process_list)

combined_kappa_results_afr_multiancestry_score <- do.call(rbind, kappa_results_afr_multiancestry_score)
# View the final structured data frame
combined_kappa_results_afr_multiancestry_score
write.table(combined_kappa_results_afr_multiancestry_score,
            'output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

## make line graph (PMBB only)

### read in input files (if needed)

#### all

In [None]:
combined_kappa_results = read.csv('output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                  sep = '\t')

#### all individuals multiancestry scores

In [None]:
combined_kappa_results_multiancestry_score = read.csv('output/performance_metrics/PMBB_v2.ALL.CKD.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                      sep = '\t')

#### eur

In [None]:
combined_kappa_results_eur = read.csv('output/performance_metrics/PMBB_v2.EUR.CKD.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                      sep = '\t')

#### eur individuals multiancestry scores

In [None]:
combined_kappa_results_eur_multiancestry_score = read.csv('output/performance_metrics/PMBB_v2.EUR.CKD.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                          sep = '\t')

#### afr

In [None]:
combined_kappa_results_afr = read.csv('output/performance_metrics/PMBB_v2.AFR.CKD.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                      sep = '\t')

#### afr individuals multiancestry scores

In [None]:
combined_kappa_results_afr_multiancestry_score = read.csv('output/performance_metrics/PMBB_v2.AFR.CKD.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                          sep = '\t')

### merge

In [None]:
kappa_merge = combined_kappa_results_afr_multiancestry_score %>%
rename('AFR_indiv.eur_multiancestry_scores' = 'kappa_value') %>%
full_join(combined_kappa_results_afr, by = c('metric', 'percentile')) %>%
rename('AFR_indiv.all_scores' = 'kappa_value') %>%
full_join(combined_kappa_results_eur_multiancestry_score, by = c('metric', 'percentile')) %>%
rename('EUR_indiv.eur_multiancestry_scores' = 'kappa_value') %>%
full_join(combined_kappa_results_eur, by = c('metric', 'percentile')) %>%
rename('EUR_indiv.all_scores' = 'kappa_value') %>%
full_join(combined_kappa_results_multiancestry_score, by = c('metric', 'percentile')) %>%
rename('ALL_indiv.eur_multiancestry_scores' = 'kappa_value') %>%
full_join(combined_kappa_results, by = c('metric', 'percentile')) %>%
rename('ALL_indiv.all_scores' = 'kappa_value') %>%
mutate(metric = gsub('prob_dif', 'Statistically_Equivalent', metric)) %>%
mutate(metric = gsub('ROPE_005', 'ROPE_0.005', metric)) %>%
mutate(metric = gsub('ROPE_01', 'ROPE_0.01', metric)) %>%
mutate(metric = gsub('ROPE_02', 'ROPE_0.02', metric))
kappa_merge

### convert to long form

In [None]:
kappa_merge_long <- kappa_merge %>%
pivot_longer(cols = ends_with("_scores"),
               names_to = "score_type",
               values_to = "score_value") %>%
mutate(metric = factor(metric, levels = c("Statistically_Equivalent", "ROPE_0.005", "ROPE_0.01", "ROPE_0.02")))
head(kappa_merge_long)

### make plot

In [None]:
kappa_plot = ggplot(kappa_merge_long, aes(x = percentile, y = score_value, color = score_type)) +
geom_point() +
geom_line(size = 1) +
facet_wrap(~ metric, nrow = 1) +  # Optional: separate panel for each metric
labs(title = "PMBB v3 CKD PGS Light's Kappa",
     x = "Percentile",
     y = "Light's Kappa",
     color = "Score") +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

kappa_plot
ggsave('output/plots/PMBB_v3.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.LightsKappa.line_graph.png',
      width = 12,
      height = 3,
      dpi = 300,
      bg = "white")

## make combined bar plot with AOU and PMBB

### read in input files

In [None]:
combined_kappa_results_pmbb = read.csv('output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                       sep = '\t')
combined_kappa_results_multiancestry_score_pmbb = read.csv('output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                           sep = '\t')
combined_kappa_results_eur_pmbb = read.csv('output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                           sep = '\t')
combined_kappa_results_eur_multiancestry_score_pmbb = read.csv('output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                               sep = '\t')
combined_kappa_results_afr_pmbb = read.csv('output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                           sep = '\t')
combined_kappa_results_afr_multiancestry_score_pmbb = read.csv('output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                               sep = '\t')

In [None]:
combined_kappa_results_aou = read.csv('AOU/AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                      sep = '\t')
combined_kappa_results_multiancestry_score_aou = read.csv('AOU/AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                          sep = '\t')
combined_kappa_results_eur_aou = read.csv('AOU/AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                          sep = '\t')
combined_kappa_results_eur_multiancestry_score_aou = read.csv('AOU/AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                              sep = '\t')
combined_kappa_results_afr_aou = read.csv('AOU/AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                          sep = '\t')
combined_kappa_results_afr_multiancestry_score_aou = read.csv('AOU/AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                              sep = '\t')

### create combined inputs

In [None]:
kappa_merge_pmbb = combined_kappa_results_pmbb %>%
filter(metric == 'ROPE_02') %>%
filter(percentile == '98') %>%
select(kappa_value) %>%
rename('ALL_indiv.all_scores' = 'kappa_value') %>%
cbind(combined_kappa_results_multiancestry_score_pmbb) %>%
filter(metric == 'ROPE_02') %>%
filter(percentile == '98') %>%
select(ALL_indiv.all_scores, kappa_value) %>%
rename('ALL_indiv.eur_multiancestry_scores' = 'kappa_value') %>%
cbind(combined_kappa_results_eur_pmbb) %>%
filter(metric == 'ROPE_02') %>%
filter(percentile == '98') %>%
select(ALL_indiv.all_scores, ALL_indiv.eur_multiancestry_scores, kappa_value) %>%
rename('EUR_indiv.all_scores' = 'kappa_value') %>%
cbind(combined_kappa_results_eur_multiancestry_score_pmbb) %>%
filter(metric == 'ROPE_02') %>%
filter(percentile == '98') %>%
select(ALL_indiv.all_scores, ALL_indiv.eur_multiancestry_scores, EUR_indiv.all_scores, kappa_value) %>%
rename('EUR_indiv.eur_multiancestry_scores' = 'kappa_value') %>%
cbind(combined_kappa_results_afr_pmbb) %>%
filter(metric == 'ROPE_02') %>%
filter(percentile == '98') %>%
select(ALL_indiv.all_scores, ALL_indiv.eur_multiancestry_scores, EUR_indiv.all_scores, EUR_indiv.eur_multiancestry_scores, kappa_value) %>%
rename('AFR_indiv.all_scores' = 'kappa_value') %>%
cbind(combined_kappa_results_afr_multiancestry_score_pmbb) %>%
filter(metric == 'ROPE_02') %>%
filter(percentile == '98') %>%
select(ALL_indiv.all_scores, ALL_indiv.eur_multiancestry_scores, EUR_indiv.all_scores, EUR_indiv.eur_multiancestry_scores, AFR_indiv.all_scores, kappa_value) %>%
mutate(Dataset = 'PMBB') %>%
rename('AFR_indiv.eur_multiancestry_scores' = 'kappa_value')
kappa_merge_pmbb

In [None]:
kappa_merge_aou = combined_kappa_results_aou %>%
filter(metric == 'ROPE_02') %>%
filter(percentile == '98') %>%
select(kappa_value) %>%
rename('ALL_indiv.all_scores' = 'kappa_value') %>%
cbind(combined_kappa_results_multiancestry_score_aou) %>%
filter(metric == 'ROPE_02') %>%
filter(percentile == '98') %>%
select(ALL_indiv.all_scores, kappa_value) %>%
rename('ALL_indiv.eur_multiancestry_scores' = 'kappa_value') %>%
cbind(combined_kappa_results_eur_aou) %>%
filter(metric == 'ROPE_02') %>%
filter(percentile == '98') %>%
select(ALL_indiv.all_scores, ALL_indiv.eur_multiancestry_scores, kappa_value) %>%
rename('EUR_indiv.all_scores' = 'kappa_value') %>%
cbind(combined_kappa_results_eur_multiancestry_score_aou) %>%
filter(metric == 'ROPE_02') %>%
filter(percentile == '98') %>%
select(ALL_indiv.all_scores, ALL_indiv.eur_multiancestry_scores, EUR_indiv.all_scores, kappa_value) %>%
rename('EUR_indiv.eur_multiancestry_scores' = 'kappa_value') %>%
cbind(combined_kappa_results_afr_aou) %>%
filter(metric == 'ROPE_02') %>%
filter(percentile == '98') %>%
select(ALL_indiv.all_scores, ALL_indiv.eur_multiancestry_scores, EUR_indiv.all_scores, EUR_indiv.eur_multiancestry_scores, kappa_value) %>%
rename('AFR_indiv.all_scores' = 'kappa_value') %>%
cbind(combined_kappa_results_afr_multiancestry_score_aou) %>%
filter(metric == 'ROPE_02') %>%
filter(percentile == '98') %>%
select(ALL_indiv.all_scores, ALL_indiv.eur_multiancestry_scores, EUR_indiv.all_scores, EUR_indiv.eur_multiancestry_scores, AFR_indiv.all_scores, kappa_value) %>%
mutate(Dataset = 'AOU') %>%
rename('AFR_indiv.eur_multiancestry_scores' = 'kappa_value')
kappa_merge_aou

In [None]:
kappa_merge_all = rbind(kappa_merge_pmbb, kappa_merge_aou)
kappa_merge_all

### convert to long form

In [None]:
kappa_merge_long <- kappa_merge_all %>%
pivot_longer(cols = ends_with("_scores"),
             names_to = "score_type",
             values_to = "score_value")
kappa_merge_long

### make plot

In [None]:
kappa_bar_plot = ggplot(kappa_merge_long, aes(x = score_type, y = score_value, fill = Dataset)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "AOU and PMBB Light's Kappa",
     x = "Score Group",
     y = "Light's Kappa",
     fill = "Dataset") +
scale_fill_manual(values = c("#990000", "#011F5B"))  + 
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = 'none', axis.title.x = element_text(margin = margin(t = 15, r = 0, b = 0, l = 0))) +
ylim(0, 1)

kappa_bar_plot
ggsave('output/plots/AOU.PMBB_v3.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.ROPE_0.02.98th_percentile.LightsKappa.bar_plot.png',
      width = 10,
      height = 6,
      dpi = 300,
      bg = "white")

### make combined ICC/light K plots

In [None]:
plot_grid(icc_bar_plot, kappa_bar_plot, ncol = 1, align = "v", axis = "tb", rel_heights = c(1, 1.8))
ggsave('output/plots/AOU.PMBB_v3.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.ROPE_0.02.ICC.98th_percentile.LightsKappa.bar_plot.png',
       dpi = 300,
       bg = "white")

# pairwise correlations between score percentile distributions

## read in input files (if needed)

### all

In [None]:
df_ntile_norm_wide = read.csv('output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.wide_form.txt',
                              sep = '\t')

In [None]:
model_metrics_df = read.csv('output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                            sep = '\t')

### eur

In [None]:
df_ntile_norm_wide_eur = read.csv('output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.wide_form.txt',
                                  sep = '\t')

In [None]:
model_metrics_df_eur = read.csv('output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                                sep = '\t')

### afr

In [None]:
df_ntile_norm_wide_afr = read.csv('output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.wide_form.txt',
                                  sep = '\t')

In [None]:
model_metrics_df_afr = read.csv('output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                                sep = '\t')

## create PGS list

In [None]:
pgs_include = unique(model_metrics_df$model)
length(pgs_include)

In [None]:
pgs_include_eur = unique(model_metrics_df_eur$model)
length(pgs_include_eur)

In [None]:
pgs_include_afr = unique(model_metrics_df_afr$model)
length(pgs_include_afr)

## calculation correlations

### all

In [None]:
# calculate R for all pairs
all_correlations <- df_ntile_norm_wide %>% 
  select(starts_with("ntile_")) %>% 
  rename_with(~ gsub("ntile_", "", .), .cols = everything())
  
calculate_correlations <- function(pgs_list) {
  correlation_coefficients <- cor(all_correlations[, pgs_list], all_correlations)
  correlation_df <- as.data.frame(correlation_coefficients, row.names = names(all_correlations)) %>%
    pivot_longer(cols = everything(), names_to = "model", values_to = "r")
  
  # Return the data frame with an additional column for the PGS ID
  correlation_df$PGS_ID <- pgs_list
  return(correlation_df)
}

# Apply the function to each PGS ID and combine results into a single data frame
all_correlations_df <- do.call(rbind, lapply(pgs_include, calculate_correlations)) %>% 
rename(model_1 = model, model_2 = PGS_ID)
head(all_correlations_df)
write.table(all_correlations_df,
           'output/performance_metrics/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.score_level_percentile.pairwise_correlations.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)
length(unique(all_correlations_df$model_1))
length(unique(all_correlations_df$model_2))

### eur

In [None]:
# calculate R for all pairs
all_correlations_eur <- df_ntile_norm_wide_eur %>% 
  select(starts_with("ntile_")) %>% 
  rename_with(~ gsub("ntile_", "", .), .cols = everything())
  
calculate_correlations <- function(pgs_list) {
  correlation_coefficients <- cor(all_correlations_eur[, pgs_list], all_correlations_eur)
  correlation_df <- as.data.frame(correlation_coefficients, row.names = names(all_correlations_eur)) %>%
    pivot_longer(cols = everything(), names_to = "model", values_to = "r")
  
  # Return the data frame with an additional column for the PGS ID
  correlation_df$PGS_ID <- pgs_list
  return(correlation_df)
}

# Apply the function to each PGS ID and combine results into a single data frame
all_correlations_df_eur <- do.call(rbind, lapply(pgs_include_eur, calculate_correlations)) %>% 
rename(model_1 = model, model_2 = PGS_ID)
head(all_correlations_df_eur)
write.table(all_correlations_df_eur,
           'output/performance_metrics/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.score_level_percentile.pairwise_correlations.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)
length(unique(all_correlations_df_eur$model_1))
length(unique(all_correlations_df_eur$model_2))

### afr

In [None]:
# calculate R for all pairs
all_correlations_afr <- df_ntile_norm_wide_afr %>% 
  select(starts_with("ntile_")) %>% 
  rename_with(~ gsub("ntile_", "", .), .cols = everything())
  
calculate_correlations <- function(pgs_list) {
  correlation_coefficients <- cor(all_correlations_afr[, pgs_list], all_correlations_afr)
  correlation_df <- as.data.frame(correlation_coefficients, row.names = names(all_correlations_afr)) %>%
    pivot_longer(cols = everything(), names_to = "model", values_to = "r")
  
  # Return the data frame with an additional column for the PGS ID
  correlation_df$PGS_ID <- pgs_list
  return(correlation_df)
}

# Apply the function to each PGS ID and combine results into a single data frame
all_correlations_df_afr <- do.call(rbind, lapply(pgs_include_afr, calculate_correlations)) %>% 
rename(model_1 = model, model_2 = PGS_ID)
head(all_correlations_df_afr)
write.table(all_correlations_df_afr,
           'output/performance_metrics/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.score_level_percentile.pairwise_correlations.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)
length(unique(all_correlations_df_afr$model_1))
length(unique(all_correlations_df_afr$model_2))

## make PGS cohort order

In [None]:
meta_phe = c('AFR.Phe_585.3.PRScsx',
             'EAS.Phe_585.3.PRScsx',
             'EUR.Phe_585.3.PRScsx',
             'AFR.eGFR.flip.PRScsx',
             'AMR.eGFR.flip.PRScsx',
             'EAS.eGFR.flip.PRScsx',
             'EUR.eGFR.flip.PRScsx')

In [None]:
PGS_cohort_order = c(ckdgen_eur$V1, ckdgen_multi$V1, ukbb$V1, meta_phe)
length(PGS_cohort_order)
PGS_cohort_order

## make heatmap

### all

In [None]:
# heat map
corr_heatmap <- all_correlations_df  %>%
filter(as.numeric(factor(model_1, levels = PGS_cohort_order)) <= as.numeric(factor(model_2, levels = PGS_cohort_order))) %>%
arrange(factor(model_2, levels = PGS_cohort_order)) %>% 
arrange(factor(model_1, levels = PGS_cohort_order)) %>% 
ggplot(aes(x = fct_inorder(model_2), y = fct_inorder(model_1), fill = r)) +
geom_tile(color = "black") +
scale_fill_gradient2(low = "skyblue", mid = "white", high = "orange", limits = c(-1, 1), name = "*r*") +
scale_shape_manual(values = c(8), na.translate = F, name = NULL) +
guides(fill = guide_colourbar(order = 1)) +
labs(x = NULL,
     y = NULL,
     title = ("PMBB v3 ALL CKD PGS Percentile Correlation")) +
coord_equal() +
theme_bw(base_size = 16) +
theme(panel.grid.major = element_blank(),
      plot.background = element_rect(fill = "transparent", color = NA),
      axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1),
      legend.title = ggtext::element_markdown(),
      plot.title.position = "plot")
    
corr_heatmap
ggsave('output/plots/PMBB_v3.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile_correlation.heatmap.png',
       width = 10,
       height = 8,
       dpi = 300)

### eur

In [None]:
# heat map
corr_heatmap_eur <- all_correlations_df_eur  %>%
filter(as.numeric(factor(model_1, levels = PGS_cohort_order)) <= as.numeric(factor(model_2, levels = PGS_cohort_order))) %>%
arrange(factor(model_2, levels = PGS_cohort_order)) %>% 
arrange(factor(model_1, levels = PGS_cohort_order)) %>% 
ggplot(aes(x = fct_inorder(model_2), y = fct_inorder(model_1), fill = r)) +
geom_tile(color = "black") +
scale_fill_gradient2(low = "skyblue", mid = "white", high = "orange", limits = c(-1, 1), name = "*r*") +
scale_shape_manual(values = c(8), na.translate = F, name = NULL) +
guides(fill = guide_colourbar(order = 1)) +
labs(x = NULL,
     y = NULL,
     title = ("PMBB v3 EUR CKD PGS Percentile Correlation")) +
coord_equal() +
#facet_nested(group1 + fct_inorder(model_1) ~ group2 + fct_inorder(model_2),
#               switch = "both", space = "free", scales = "free") +
theme_bw(base_size = 16) +
theme(panel.grid.major = element_blank(),
      plot.background = element_rect(fill = "transparent", color = NA),
      axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1),
      legend.title = ggtext::element_markdown(),
      plot.title.position = "plot")
    
corr_heatmap_eur
ggsave('output/plots/PMBB_v3.EUR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile_correlation.heatmap.png',
       width = 10,
       height = 8,
       dpi = 300)

### afr

In [None]:
# heat map
corr_heatmap_afr <- all_correlations_df_afr  %>%
filter(as.numeric(factor(model_1, levels = PGS_cohort_order)) <= as.numeric(factor(model_2, levels = PGS_cohort_order))) %>%
arrange(factor(model_2, levels = PGS_cohort_order)) %>% 
arrange(factor(model_1, levels = PGS_cohort_order)) %>% 
ggplot(aes(x = fct_inorder(model_2), y = fct_inorder(model_1), fill = r)) +
geom_tile(color = "black") +
scale_fill_gradient2(low = "skyblue", mid = "white", high = "orange", limits = c(-1, 1), name = "*r*") +
scale_shape_manual(values = c(8), na.translate = F, name = NULL) +
guides(fill = guide_colourbar(order = 1)) +
labs(x = NULL,
     y = NULL,
     title = ("PMBB v3 AFR CKD PGS Percentile Correlation")) +
coord_equal() +
theme_bw(base_size = 16) +
theme(panel.grid.major = element_blank(),
      plot.background = element_rect(fill = "transparent", color = NA),
      axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1),
      legend.title = ggtext::element_markdown(),
      plot.title.position = "plot")
    
corr_heatmap_afr
ggsave('output/plots/PMBB_v3.AFR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile_correlation.heatmap.png',
       width = 10,
       height = 8,
       dpi = 300)

## look at correlation coefficients

### all

In [None]:
all_correlations_df %>%
filter(r < 0.99) %>%
summary()

### eur

In [None]:
all_correlations_df_eur %>%
filter(r < 0.99) %>%
summary()

### afr

In [None]:
all_correlations_df_afr %>%
filter(r < 0.99) %>%
summary()

# make number of equivalent scores plot

## read in file

In [None]:
n_equiv_scores = read.csv('input/ckd_equivalent_scores.csv')
n_equiv_scores

## convert to long form

In [None]:
n_equiv_scores_long <- n_equiv_scores %>%
pivot_longer(cols = c('Statistically_Equivalent','ROPE_0.005','ROPE_0.01','ROPE_0.02'),
             names_to = "Equiv_Metric",
             values_to = "N_Scores") %>%
mutate(Equiv_Metric = factor(Equiv_Metric, levels = c("Statistically_Equivalent", "ROPE_0.005", "ROPE_0.01", "ROPE_0.02"))) %>%
mutate(Cohort = gsub('ALL.','ALL_indiv.',Cohort)) %>%
mutate(Cohort = gsub('EUR.','EUR_indiv.',Cohort)) %>%
mutate(Cohort = gsub('AFR.','AFR_indiv.',Cohort)) %>%
mutate(GROUP = sub('^[^_]*_','',Cohort)) %>%
mutate(BIOBANK = gsub('_.*','',Cohort))
head(n_equiv_scores_long)

## make plot

In [None]:
n_equiv_scores_plot = ggplot(n_equiv_scores_long, aes(x = Equiv_Metric, y = N_Scores, color = BIOBANK, group = BIOBANK)) +
geom_point() +
geom_line() +
facet_wrap(~ GROUP, nrow = 1) +  # Optional: separate panel for each metric
labs(title = "Number of Equivalent CKD Scores",
     x = "Equivalence Metric",
     y = "N Scores",
     color = "Cohort") +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
     plot.margin = unit(c(1, 1, 2, 2), "cm"))

n_equiv_scores_plot
ggsave('output/plots/n_equivalent_scores.line_graph.png',
      width = 20,
      dpi = 300,
      bg = "white")