# load packages

## install

In [None]:
install.packages('bigreadr')
install.packages('R.utils')
install.packages('tidyposterior')
install.packages('tidymodels')
install.packages('rlang')
install.packages('probably')
install.packages('ggpubr')
install.packages('kableExtra')
install.packages('gginnards')
install.packages('irr')
install.packages('simpleboot')
install.packages('CalibratR')
install.packages('eulerr')
install.packages('ggtext')

## load

In [None]:
library(rlang)
library(bigreadr)
library(R.utils)
library(tidyverse)
library(tidyposterior)
library(tidymodels)
library(parsnip)
library(probably)
library(ggpubr)
library(colorspace)
library(kableExtra)
library(IRdisplay)
library(ggsci)
library(knitr)
library(gginnards)
library(irr)
library(simpleboot)
library(boot)
library(reshape2)
library(CalibratR)
library(data.table)
library(eulerr)
library(patchwork)
library(ggtext)

# read in input files

## score

In [None]:
my_bucket = Sys.getenv('WORKSPACE_BUCKET')
system(paste0("gsutil cp -R ", my_bucket, '/CKD/output/AOU'), intern = T)

In [None]:
score = fread('AOU/score/AOU_pgs.txt.gz',
              sep = '\t')
head(score)
length(unique(score$PGS))
length(unique(score$IID))
unique(score$PGS)

## pheno/covar

In [None]:
my_bucket = Sys.getenv('WORKSPACE_BUCKET')
system(paste0("gsutil cp -R ", my_bucket, '/CKD/pheno/AOU_v8.CKD_PGS.YK_phenotyping.phenotype_covariates.txt .'), intern=T)

In [None]:
pheno_covar = fread('AOU_v8.CKD_PGS.YK_phenotyping.phenotype_covariates.txt',
                    sep = '\t')
head(pheno_covar)

## pop

In [None]:
pop = fread('CKD_AOU/AOU/score/AOU_popsimilarity.txt.gz',
            sep = '\t')
head(pop)
colnames(pop)

## PGS IDs grouped by training sets

In [None]:
my_bucket = Sys.getenv('WORKSPACE_BUCKET')
system(paste0("gsutil cp -R ", my_bucket, '/CKD/input/CKD.PGS_list.Training* .'))

In [None]:
ukbb = read.csv('CKD.PGS_list.Training.UKBB.txt',
                header = F)
head(ukbb)

In [None]:
ckdgen_eur = read.csv('CKD.PGS_list.Training.CKDGen.EUR_only.txt',
                      header = F)
head(ckdgen_eur)

In [None]:
ckdgen_multi = read.csv('CKD.PGS_list.Training.CKDGen.multi_ancestry.txt',
                        header = F)
head(ckdgen_multi)

## PRScs iterations

In [None]:
my_bucket = Sys.getenv('WORKSPACE_BUCKET')
system(paste('gsutil -m cp -R', my_bucket, '/CKD/output/PRScs_iterations/* .'))

In [None]:
afr_egfr = fread('AFR.eGFR.flip/AOU_pgs.txt.gz',
                 sep = '\t',
                 select = c('IID', 'PGS', 'Z_norm2'))
head(afr_egfr)

In [None]:
eur_egfr = fread('EUR.eGFR.flip/AOU_pgs.txt.gz',
                 sep = '\t',
                 select = c('IID', 'PGS', 'Z_norm2'))
head(eur_egfr)

In [None]:
meta_egfr1 = fread('META.eGFR.flip/AOU_pgs.txt.gz',
                   sep = '\t',
                   select = c('IID', 'PGS', 'Z_norm2'))
head(meta_egfr1)

In [None]:
meta_egfr2 = fread('META.eGFR.flip.2/AOU_pgs.txt.gz',
                   sep = '\t',
                   select = c('IID', 'PGS', 'Z_norm2'))
head(meta_egfr2)

# clean scores

## subset and rename

In [None]:
score_sub = score %>%
select(-c(sampleset, FID)) %>%
mutate(PGS = gsub('_hmPOS_GRCh38','',PGS))
head(score_sub)

In [None]:
pheno_covar_rename = pheno_covar %>%
rename('IID' = 'person_id') %>%
mutate(IID = as.character(IID)) %>%
mutate(CKD = as.factor(CKD))
head(pheno_covar_rename)

In [None]:
pop_sub = pop %>%
select(IID,MostSimilarPop)
unique(pop_sub$MostSimilarPop)
head(pop_sub)

## merge

In [None]:
all_score = score_sub %>%
inner_join(pheno_covar_rename, by = 'IID') %>%
inner_join(pop_sub, by = 'IID')
nrow(all_score)
length(unique(all_score$IID))
nrow(score_sub)
length(unique(score_sub$IID))
nrow(pheno_covar_rename)
nrow(pop_sub)
head(all_score)

### PRScs iterations

In [None]:
afr_egfr_merge = afr_egfr %>%
inner_join(pheno_covar_rename, by = 'IID') %>%
inner_join(pop_sub, by = 'IID')
head(afr_egfr_merge)

In [None]:
eur_egfr_merge = eur_egfr %>%
inner_join(pheno_covar_rename, by = 'IID') %>%
inner_join(pop_sub, by = 'IID')
head(eur_egfr_merge)

In [None]:
meta_egfr_merge = meta_egfr1 %>%
rbind(meta_egfr2) %>%
inner_join(pheno_covar_rename, by = 'IID') %>%
inner_join(pop_sub, by = 'IID')
head(meta_egfr_merge)

## subset to EUR and AFR only

In [None]:
all_score_eur = all_score %>%
filter(grepl('EUR', MostSimilarPop))
unique(all_score_eur$MostSimilarPop)
head(all_score_eur)

In [None]:
all_score_afr = all_score %>%
filter(grepl('AFR', MostSimilarPop))
unique(all_score_afr$MostSimilarPop)
head(all_score_afr)

# get summary data

## case/control

In [None]:
all_score %>%
select(IID, CKD) %>%
distinct() %>%
group_by(CKD) %>%
summarise(n())

## age

In [None]:
all_score %>%
select(IID, AGE) %>%
distinct() %>%
select(-c(IID)) %>%
summary()

In [None]:
all_score %>%
select(IID, AGE) %>%
distinct() %>%
ggplot(aes(x = AGE, fill = 'red')) + geom_density(alpha = 0.5) +
ggtitle('AOU CKD PGS Age Distribution') +
theme(legend.position = "none")

ggsave('AOU.CKD.YK_Phenotyping.PGS.all_samples.age_distribution.png')

In [None]:
all_score %>%
select(IID, CKD, AGE) %>%
distinct()%>%
group_by(CKD) %>%
summarize(Mean_Age = mean(AGE, na.rm = TRUE))

In [None]:
all_score %>%
select(IID, CKD, AGE) %>%
distinct() %>%
ggplot(aes(x = AGE, color = CKD, fill = CKD)) + geom_density(alpha = 0.5) +
ggtitle('AOU CKD PGS Age Distribution')

ggsave('AOU.CKD.YK_Phenotyping.PGS.case_control.age_distribution.png')

## age less then vs greater than or equal to 60

In [None]:
all_score %>%
select(IID, AGE, CKD) %>%
distinct() %>%
mutate(AGE_less_60 = ifelse(AGE < 60, 'Yes', 'No')) %>%
group_by(CKD, AGE_less_60) %>%
summarize(Count = n()) %>%
mutate(Percent = (Count / sum(Count)) * 100)

In [None]:
all_score %>%
select(IID, AGE, CKD) %>%
distinct() %>%
mutate(AGE_less_60 = ifelse(AGE < 60, 'Yes', 'No')) %>%
ggplot(aes(x = AGE_less_60, fill = CKD)) + geom_bar() +
ggtitle('AOU CKD PGS Age Less Than 60 Distribution')

ggsave('AOU.CKD.YK_Phenotyping.PGS.case_control.age_less_60_distribution.png')

## sex

In [None]:
all_score %>%
select(IID, SEX, CKD) %>%
distinct() %>%
group_by(SEX) %>%
summarize(Count = n()) %>%
mutate(Percent = (Count / sum(Count)) * 100)

In [None]:
all_score %>%
select(IID, SEX) %>%
distinct() %>%
mutate(SEX = gsub(1, 'Male', SEX)) %>%
mutate(SEX = gsub(2, 'Female', SEX)) %>%
ggplot(aes(x = SEX, fill = SEX)) + geom_bar() +
ggtitle('AOU CKD PGS Sex Distribution')

ggsave('AOU.CKD.YK_Phenotyping.PGS.all_samples.sex_distribution.png')

In [None]:
all_score %>%
select(IID, SEX, CKD) %>%
distinct() %>%
group_by(CKD, SEX) %>%
summarize(Count = n()) %>%
mutate(Percent = (Count / sum(Count)) * 100)

In [None]:
all_score %>%
select(IID, SEX, CKD) %>%
distinct() %>%
mutate(SEX = gsub(1, 'Male', SEX)) %>%
mutate(SEX = gsub(2, 'Female', SEX)) %>%
ggplot(aes(x = SEX, fill = CKD)) + geom_bar() +
ggtitle('AOU CKD PGS Sex Distribution')

ggsave('AOU.CKD.YK_Phenotyping.PGS.case_control.sex_distribution.png')

## ancestry

In [None]:
all_score %>%
select(IID, MostSimilarPop) %>%
distinct() %>%
group_by(MostSimilarPop) %>%
summarize(Count = n()) %>%
mutate(Percent = (Count / sum(Count)) * 100)

In [None]:
all_score %>%
select(IID, MostSimilarPop) %>%
distinct() %>%
rename('ANCESTRY' = 'MostSimilarPop') %>%
ggplot(aes(x = ANCESTRY, fill = ANCESTRY)) + geom_bar() +
ggtitle('AOU CKD PGS Ancestry Distribution')

ggsave('AOU.CKD.YK_Phenotyping.PGS.all_samples.ancestry_distribution.png')

In [None]:
all_score %>%
select(IID, CKD, MostSimilarPop) %>%
distinct() %>%
group_by(MostSimilarPop, CKD) %>%
summarize(Count = n()) %>%
mutate(Percent = (Count / sum(Count)) * 100)

In [None]:
all_score %>%
select(IID, MostSimilarPop, CKD) %>%
distinct() %>%
rename('ANCESTRY' = 'MostSimilarPop') %>%
ggplot(aes(x = ANCESTRY, fill = CKD)) + geom_bar() +
ggtitle('AOU CKD PGS Ancestry Distribution')

ggsave('AOU.CKD.YK_Phenotyping.PGS.case_control.ancestry_distribution.png')

## T2D

In [None]:
all_score %>%
select(IID, CKD, T2D) %>%
distinct() %>%
group_by(T2D, CKD) %>%
summarize(Count = n()) %>%
mutate(Percent = (Count / sum(Count)) * 100)

In [None]:
all_score %>%
select(IID, T2D, CKD) %>%
distinct() %>%
mutate(T2D = gsub(1, 'Case', T2D)) %>%
mutate(T2D = gsub(0, 'Control', T2D)) %>%
na.omit() %>%
ggplot(aes(x = T2D, fill = CKD)) + geom_bar() +
ggtitle('AOU CKD PGS T2D Distribution') +
scale_y_continuous(labels = scales::comma) 

ggsave('AOU.CKD.YK_Phenotyping.PGS.case_control.t2d_distribution.png')

## HT- YK Phenotyping

In [None]:
all_score %>%
select(IID, CKD, HT_YK) %>%
distinct() %>%
group_by(HT_YK, CKD) %>%
summarize(Count = n()) %>%
mutate(Percent = (Count / sum(Count)) * 100)

In [None]:
all_score %>%
select(IID, HT_YK, CKD) %>%
distinct() %>%
rename('HT' = 'HT_YK') %>%
mutate(HT = gsub(1, 'Case', HT)) %>%
mutate(HT = gsub(0, 'Control', HT)) %>%
na.omit() %>%
ggplot(aes(x = HT, fill = CKD)) + geom_bar() +
ggtitle('AOU CKD PGS Hypertension Distribution') +
scale_y_continuous(labels = scales::comma) +
xlab('Hypertension')

ggsave('AOU.CKD.YK_Phenotyping.PGS.case_control.ht_yk_distribution.png')

# logisitic regressions: Znorm2 (continuous PGS) outcome
- add clinical covariates (T2D and/or HT) by adding them as variables in the regression model
- filter regressions by clinical covariate case control status (T2D or HT) or age binning (less than or greater than 60) by filtering the "one score" dataframe

## create score list

In [None]:
score_list = unique(all_score$PGS)
length(score_list)
score_list

## run LR on all scores for all individuals (Znorm2 + age + sex)

In [None]:
# create empty dataframes
all_tidy_znorm2_list = list()
all_glance_znorm2_list = list()

# loop through scores
for (score in score_list) {
    index <- which(score_list == score)
    message(paste0('starting ', score, ' at index ', index))
    # filter df to that score
    one_score = all_score %>%
    filter(grepl(score, PGS))
    
    # run LR
    glm <- glm(data = one_score, as.formula("CKD ~ Z_norm2 + AGE + SEX"), family = "binomial")
    # extract stats
    ## term, estimate, std.error, statistic, p.value, conf.low, conf.high
    tidy = data.frame(broom::tidy(glm, exponentiate = TRUE, conf.int = TRUE))
    ## null.deviance, df.null, logLik, AIC, BIC, deviance, df.residual, nobs
    glance = data.frame(broom::glance(glm))
    
    # add score columns
    tidy = tidy %>%
    mutate(SCORE = score)
    glance = glance %>%
    mutate(SCORE = score)
    
    # add to lists
    all_tidy_znorm2_list[[score]] <- tidy
    all_glance_znorm2_list[[score]] <- glance
}

In [None]:
all_tidy_znorm2 = bind_rows(all_tidy_znorm2_list)
all_glance_znorm2 = bind_rows(all_glance_znorm2_list)

## run LR on EUR only (Znorm2 + age + sex)

In [None]:
# create empty dataframes
all_tidy_eur_znorm2_list = list()
all_glance_eur_znorm2_list = list()

# loop through scores
for (score in score_list) {
    index <- which(score_list == score)
    message(paste0('starting ', score, ' at index ', index))
    # filter df to that score
    one_score = all_score_eur %>%
    filter(grepl(score, PGS))
    
    # run LR
    glm <- glm(data = one_score, as.formula("CKD ~ Z_norm2 + AGE + SEX"), family = "binomial")
    # extract stats
    ## term, estimate, std.error, statistic, p.value, conf.low, conf.high
    tidy = data.frame(broom::tidy(glm, exponentiate = TRUE, conf.int = TRUE))
    ## null.deviance, df.null, logLik, AIC, BIC, deviance, df.residual, nobs
    glance = data.frame(broom::glance(glm))
    
    # add score columns
    tidy = tidy %>%
    mutate(SCORE = score)
    glance = glance %>%
    mutate(SCORE = score)
    
    # add to lists
    all_tidy_eur_znorm2_list[[score]] <- tidy
    all_glance_eur_znorm2_list[[score]] <- glance
}

In [None]:
all_tidy_eur_znorm2 = bind_rows(all_tidy_eur_znorm2_list)
all_glance_eur_znorm2 = bind_rows(all_glance_eur_znorm2_list)

## run LR on AFR only (Znorm2 + age + sex)

In [None]:
# create empty dataframes
all_tidy_afr_znorm2_list = list()
all_glance_afr_znorm2_list = list()

# loop through scores
for (score in score_list) {
    index <- which(score_list == score)
    message(paste0('starting ', score, ' at index ', index))
    # filter df to that score
    one_score = all_score_afr %>%
    filter(grepl(score, PGS)) %>%
    
    # run LR
    glm <- glm(data = one_score, as.formula("CKD ~ Z_norm2 + AGE + SEX"), family = "binomial")
    # extract stats
    ## term, estimate, std.error, statistic, p.value, conf.low, conf.high
    tidy = data.frame(broom::tidy(glm, exponentiate = TRUE, conf.int = TRUE))
    ## null.deviance, df.null, logLik, AIC, BIC, deviance, df.residual, nobs
    glance = data.frame(broom::glance(glm))
    
    # add score columns
    tidy = tidy %>%
    mutate(SCORE = score)
    glance = glance %>%
    mutate(SCORE = score)
    
    # add to lists
    all_tidy_afr_znorm2_list[[score]] <- tidy
    all_glance_afr_znorm2_list[[score]] <- glance
}

In [None]:
all_tidy_afr_znorm2 = bind_rows(all_tidy_afr_znorm2_list)
all_glance_afr_znorm2 = bind_rows(all_glance_afr_znorm2_list)

## filter tidy df to only scores, add CI column, and remove score suffix

In [None]:
all_tidy_znorm2_score = all_tidy_znorm2 %>%
filter(term == "Z_norm2") %>%
mutate(CI = paste0("(", conf.low, ", ", conf.high, ")"))
summary(all_tidy_znorm2_score$p.value)
summary(all_tidy_znorm2_score$estimate)

In [None]:
all_tidy_eur_znorm2_score = all_tidy_eur_znorm2 %>%
filter(term == "Z_norm2") %>%
mutate(CI = paste0("(", conf.low, ", ", conf.high, ")"))
summary(all_tidy_eur_znorm2_score$p.value)
summary(all_tidy_eur_znorm2_score$estimate)

In [None]:
all_tidy_afr_znorm2_score = all_tidy_afr_znorm2 %>%
filter(term == "Z_norm2") %>%
mutate(CI = paste0("(", conf.low, ", ", conf.high, ")"))
summary(all_tidy_afr_znorm2_score$p.value)
summary(all_tidy_afr_znorm2_score$estimate)

## export

In [None]:
write.table(all_tidy_znorm2_score,
            'AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.glm.score.tidy.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

In [None]:
write.table(all_tidy_eur_znorm2_score,
            'AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.glm.score.tidy.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

In [None]:
write.table(all_tidy_afr_znorm2_score,
            'AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.glm.score.tidy.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

# logistic regressions: classification above/below 98th percentile as outcome
- add clinical covariates (T2D and/or HT) by adding them as variables in the regression model
- filter regressions by clinical covariate case control status (T2D or HT) or age binning (less than or greater than 60) by filtering the "one score" dataframe

## compute percentile and pseudo case col

In [None]:
all_score_ntile = all_score %>%
mutate(ntile = 100 * pnorm(Z_norm2),
       PRED_CASE = case_when(ntile >= 98 ~ 1,
                             ntile < 98 ~ 0))

In [None]:
all_score_eur_ntile = all_score_eur %>%
mutate(ntile = 100 * pnorm(Z_norm2),
       PRED_CASE = case_when(ntile >= 98 ~ 1,
                             ntile < 98 ~ 0))

In [None]:
all_score_afr_ntile = all_score_afr %>%
mutate(ntile = 100 * pnorm(Z_norm2),
       PRED_CASE = case_when(ntile >= 98 ~ 1,
                             ntile < 98 ~ 0))

## create score list

In [None]:
score_list = unique(all_score$PGS)
length(score_list)

## regressions- all individuals

In [None]:
# create empty dataframes
all_tidy_znorm2_list = list()
all_glance_znorm2_list = list()

# loop through scores
for (score in score_list) {
    index <- which(score_list == score)
    message(paste0('starting ', score, ' at index ', index))
    # filter df to that score
    one_score = all_score_ntile %>%
    filter(grepl(score, PGS))
    
    # run LR
    glm <- glm(data = one_score, as.formula("CKD ~ PRED_CASE + AGE + SEX"), family = "binomial")
    # extract stats
    ## term, estimate, std.error, statistic, p.value, conf.low, conf.high
    tidy = data.frame(broom::tidy(glm, exponentiate = TRUE, conf.int = TRUE))
    ## null.deviance, df.null, logLik, AIC, BIC, deviance, df.residual, nobs
    glance = data.frame(broom::glance(glm))
    
    # add score columns
    tidy = tidy %>%
    mutate(SCORE = score)
    glance = glance %>%
    mutate(SCORE = score)
    
    # add to lists
    all_tidy_znorm2_list[[score]] <- tidy
    all_glance_znorm2_list[[score]] <- glance
}

In [None]:
all_tidy_98th = bind_rows(all_tidy_znorm2_list)
all_glance_98th = bind_rows(all_glance_znorm2_list)

## regressions- eur

In [None]:
# create empty dataframes
all_tidy_eur_znorm2_list = list()
all_glance_eur_znorm2_list = list()

# loop through scores
for (score in score_list) {
    index <- which(score_list == score)
    message(paste0('starting ', score, ' at index ', index))
    # filter df to that score
    one_score = all_score_eur_ntile %>%
    filter(grepl(score, PGS))
    
    # run LR
    glm <- glm(data = one_score, as.formula("CKD ~ PRED_CASE + AGE + SEX"), family = "binomial")
    # extract stats
    ## term, estimate, std.error, statistic, p.value, conf.low, conf.high
    tidy = data.frame(broom::tidy(glm, exponentiate = TRUE, conf.int = TRUE))
    ## null.deviance, df.null, logLik, AIC, BIC, deviance, df.residual, nobs
    glance = data.frame(broom::glance(glm))
    
    # add score columns
    tidy = tidy %>%
    mutate(SCORE = score)
    glance = glance %>%
    mutate(SCORE = score)
    
    # add to lists
    all_tidy_eur_znorm2_list[[score]] <- tidy
    all_glance_eur_znorm2_list[[score]] <- glance
}

In [None]:
all_tidy_eur_98th = bind_rows(all_tidy_eur_znorm2_list)
all_glance_eur_98th = bind_rows(all_glance_eur_znorm2_list)

## regressions- afr

In [None]:
# create empty dataframes
all_tidy_afr_znorm2_list = list()
all_glance_afr_znorm2_list = list()

# loop through scores
for (score in score_list) {
    index <- which(score_list == score)
    message(paste0('starting ', score, ' at index ', index))
    # filter df to that score
    one_score = all_score_afr_ntile %>%
    filter(grepl(score, PGS))
    
    # run LR
    glm <- glm(data = one_score, as.formula("CKD ~ PRED_CASE + AGE + SEX"), family = "binomial")
    # extract stats
    ## term, estimate, std.error, statistic, p.value, conf.low, conf.high
    tidy = data.frame(broom::tidy(glm, exponentiate = TRUE, conf.int = TRUE))
    ## null.deviance, df.null, logLik, AIC, BIC, deviance, df.residual, nobs
    glance = data.frame(broom::glance(glm))
    
    # add score columns
    tidy = tidy %>%
    mutate(SCORE = score)
    glance = glance %>%
    mutate(SCORE = score)
    
    # add to lists
    all_tidy_afr_znorm2_list[[score]] <- tidy
    all_glance_afr_znorm2_list[[score]] <- glance
}

In [None]:
all_tidy_afr_98th = bind_rows(all_tidy_afr_znorm2_list)
all_glance_afr_98th = bind_rows(all_glance_afr_znorm2_list)

## concatenate

In [None]:
all_tidy_98th_score = all_tidy_98th %>%
filter(term == "PRED_CASE") %>%
mutate(CI = paste0("(", conf.low, ", ", conf.high, ")"))
summary(all_tidy_98th_score$p.value)
summary(all_tidy_98th_score$estimate)

In [None]:
all_tidy_eur_98th_score = all_tidy_eur_98th %>%
filter(term == "PRED_CASE") %>%
mutate(CI = paste0("(", conf.low, ", ", conf.high, ")"))
summary(all_tidy_eur_98th_score$p.value)
summary(all_tidy_eur_98th_score$estimate)

In [None]:
all_tidy_afr_98th_score = all_tidy_afr_98th %>%
filter(term == "PRED_CASE") %>%
mutate(CI = paste0("(", conf.low, ", ", conf.high, ")"))
summary(all_tidy_afr_98th_score$p.value)
summary(all_tidy_afr_98th_score$estimate)

## export

In [None]:
write.table(all_tidy_98th_score,
            'AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

In [None]:
write.table(all_tidy_eur_98th_score,
            'AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

In [None]:
write.table(all_tidy_afr_98th_score,
            'AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

# assess effect size

## read in input files (if needed)

In [None]:
my_bucket = Sys.getenv('WORKSPACE_BUCKET')
system(paste0("gsutil cp -R ", my_bucket, '/CKD/output/performance/AOU.*.PGS_Znorm2.glm.score.tidy.txt .'), intern=T)

### all

#### no clinical covariates

In [None]:
all_tidy_znorm2_score = read.csv('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.glm.score.tidy.txt',
                                 sep = '\t')
dim(all_tidy_znorm2_score)

In [None]:
all_tidy_98th_score = read.csv('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                 sep = '\t')
dim(all_tidy_98th_score)

#### clinical covariate regression models

##### znorm2

In [None]:
all_tidy_znorm2_score_clinical_t2d = read.csv('AOU.ALL.CKD.YK_Phenotyping.clinical_covariates.T2D_only.PGS_Znorm2.glm.score.tidy.txt',
                                              sep = '\t')
dim(all_tidy_znorm2_score_clinical_t2d)

In [None]:
all_tidy_znorm2_score_clinical_ht_yk = read.csv('AOU.ALL.CKD.YK_Phenotyping.clinical_covariates.HT_YK_only.PGS_Znorm2.glm.score.tidy.txt',
                                                sep = '\t')
dim(all_tidy_znorm2_score_clinical_ht_yk)

In [None]:
all_tidy_znorm2_score_clinical_ht_yk_t2d = read.csv('AOU.ALL.CKD.YK_Phenotyping.clinical_covariates.HT_YK.T2D.PGS_Znorm2.glm.score.tidy.txt',
                                                    sep = '\t')
dim(all_tidy_znorm2_score_clinical_ht_yk_t2d)

##### 98th percentile

In [None]:
all_tidy_98th_score_clinical_ht_yk_t2d = read.csv('AOU.ALL.CKD.YK_Phenotyping.clinical_covariates.HT_YK.T2D.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                  sep = '\t')
dim(all_tidy_98th_score_clinical_ht_yk_t2d)

In [None]:
all_tidy_98th_score_clinical_t2d = read.csv('AOU.ALL.CKD.YK_Phenotyping.clinical_covariates.T2D_only.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                            sep = '\t')
dim(all_tidy_98th_score_clinical_t2d)

In [None]:
all_tidy_98th_score_clinical_ht_yk = read.csv('AOU.ALL.CKD.YK_Phenotyping.clinical_covariates.HT_YK_only.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                              sep = '\t')
dim(all_tidy_98th_score_clinical_ht_yk)

#### clinical covariates- subset by cases and controls

##### znorm2

###### t2d

In [None]:
all_tidy_znorm2_score_clinical_t2d_case = read.csv('AOU.ALL.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.glm.score.tidy.txt',
                                                   sep = '\t')
dim(all_tidy_znorm2_score_clinical_t2d_case)

In [None]:
all_tidy_znorm2_score_clinical_t2d_control = read.csv('AOU.ALL.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.glm.score.tidy.txt',
                                                      sep = '\t')
dim(all_tidy_znorm2_score_clinical_t2d_control)

###### age

In [None]:
all_tidy_znorm2_score_clinical_age_less_60 = read.csv('AOU.ALL.CKD.YK_Phenotyping.AGE_less_60.PGS_Znorm2.glm.score.tidy.txt',
                                                      sep = '\t')
dim(all_tidy_znorm2_score_clinical_age_less_60)

In [None]:
all_tidy_znorm2_score_clinical_age_greater_equal_60 = read.csv('AOU.ALL.CKD.YK_Phenotyping.AGE_greater_equal_60.PGS_Znorm2.glm.score.tidy.txt',
                                                               sep = '\t')
dim(all_tidy_znorm2_score_clinical_age_greater_equal_60)

##### HT YK

In [None]:
all_tidy_znorm2_score_clinical_ht_yk_case = read.csv('AOU.ALL.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.glm.score.tidy.txt',
                                                     sep = '\t')
dim(all_tidy_znorm2_score_clinical_ht_yk_case)

In [None]:
all_tidy_znorm2_score_clinical_ht_yk_control = read.csv('AOU.ALL.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.glm.score.tidy.txt',
                                                        sep = '\t')
dim(all_tidy_znorm2_score_clinical_ht_yk_control)

##### 98th percentile

###### t2d

In [None]:
all_tidy_98th_score_clinical_t2d_case = read.csv('AOU.ALL.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                 sep = '\t')
dim(all_tidy_98th_score_clinical_t2d_case)

In [None]:
all_tidy_98th_score_clinical_t2d_control = read.csv('AOU.ALL.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                    sep = '\t')
dim(all_tidy_98th_score_clinical_t2d_control)

###### age

In [None]:
all_tidy_98th_score_clinical_age_less_60 = read.csv('AOU.ALL.CKD.YK_Phenotyping.AGE_less_60.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                    sep = '\t')
dim(all_tidy_98th_score_clinical_age_less_60)

In [None]:
all_tidy_98th_score_clinical_age_greater_equal_60 = read.csv('AOU.ALL.CKD.YK_Phenotyping.AGE_greater_equal_60.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                              sep = '\t')
dim(all_tidy_98th_score_clinical_age_greater_equal_60)

###### HT YK

In [None]:
all_tidy_98th_score_clinical_ht_yk_case = read.csv('AOU.ALL.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                   sep = '\t')
dim(all_tidy_98th_score_clinical_ht_yk_case)

In [None]:
all_tidy_98th_score_clinical_ht_yk_control = read.csv('AOU.ALL.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                      sep = '\t')
dim(all_tidy_98th_score_clinical_ht_yk_control)

### eur

#### no clinical covariates

In [None]:
all_tidy_eur_znorm2_score = read.csv('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.glm.score.tidy.txt',
                                     sep = '\t')
dim(all_tidy_eur_znorm2_score)

In [None]:
all_tidy_eur_98th_score = read.csv('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                   sep = '\t')
dim(all_tidy_eur_98th_score)

#### clinical covariate regression models

##### znorm2

In [None]:
all_tidy_eur_znorm2_score_clinical_ht_yk_t2d = read.csv('AOU.EUR.CKD.YK_Phenotyping.clinical_covariates.HT_YK.T2D.PGS_Znorm2.glm.score.tidy.txt',
                                                        sep = '\t')
dim(all_tidy_eur_znorm2_score_clinical_ht_yk_t2d)

In [None]:
all_tidy_eur_znorm2_score_clinical_t2d = read.csv('AOU.EUR.CKD.YK_Phenotyping.clinical_covariates.T2D_only.PGS_Znorm2.glm.score.tidy.txt',
                                                  sep = '\t')
dim(all_tidy_eur_znorm2_score_clinical_t2d)

In [None]:
all_tidy_eur_znorm2_score_clinical_ht_yk = read.csv('AOU.EUR.CKD.YK_Phenotyping.clinical_covariates.HT_YK_only.PGS_Znorm2.glm.score.tidy.txt',
                                                    sep = '\t')
dim(all_tidy_eur_znorm2_score_clinical_ht_yk)

##### 98th percentile

In [None]:
all_tidy_eur_98th_score_clinical_ht_yk_t2d = read.csv('AOU.EUR.CKD.YK_Phenotyping.clinical_covariates.HT_YK.T2D.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                      sep = '\t')
dim(all_tidy_eur_98th_score_clinical_ht_yk_t2d)

In [None]:
all_tidy_eur_98th_score_clinical_t2d = read.csv('AOU.EUR.CKD.YK_Phenotyping.clinical_covariates.T2D_only.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                sep = '\t')
dim(all_tidy_eur_98th_score_clinical_t2d)

In [None]:
all_tidy_eur_98th_score_clinical_ht_yk = read.csv('AOU.EUR.CKD.YK_Phenotyping.clinical_covariates.HT_YK_only.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                  sep = '\t')
dim(all_tidy_eur_98th_score_clinical_ht_yk)

#### clinical covariates- subset by cases and controls

##### znorm2

###### T2D

In [None]:
all_tidy_eur_znorm2_score_clinical_t2d_case = read.csv('AOU.EUR.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.glm.score.tidy.txt',
                                                       sep = '\t')
dim(all_tidy_eur_znorm2_score_clinical_t2d_case)

In [None]:
all_tidy_eur_znorm2_score_clinical_t2d_control = read.csv('AOU.EUR.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.glm.score.tidy.txt',
                                                          sep = '\t')
dim(all_tidy_eur_znorm2_score_clinical_t2d_control)

###### HT YK

In [None]:
all_tidy_eur_znorm2_score_clinical_ht_yk_case = read.csv('AOU.EUR.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.glm.score.tidy.txt',
                                                         sep = '\t')
dim(all_tidy_eur_znorm2_score_clinical_ht_yk_case)

In [None]:
all_tidy_eur_znorm2_score_clinical_ht_yk_control = read.csv('AOU.EUR.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.glm.score.tidy.txt',
                                                            sep = '\t')
dim(all_tidy_eur_znorm2_score_clinical_ht_yk_control)

###### age

In [None]:
all_tidy_eur_znorm2_score_clinical_age_less_60 = read.csv('AOU.EUR.CKD.YK_Phenotyping.AGE_less_60.PGS_Znorm2.glm.score.tidy.txt',
                                                          sep = '\t')
dim(all_tidy_eur_znorm2_score_clinical_age_less_60)

In [None]:
all_tidy_eur_znorm2_score_clinical_age_greater_equal_60 = read.csv('AOU.EUR.CKD.YK_Phenotyping.AGE_greater_equal_60.PGS_Znorm2.glm.score.tidy.txt',
                                                                   sep = '\t')
dim(all_tidy_eur_znorm2_score_clinical_age_greater_equal_60)

##### 98th percentile

###### T2D

In [None]:
all_tidy_eur_98th_score_clinical_t2d_case = read.csv('AOU.EUR.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                     sep = '\t')
dim(all_tidy_eur_98th_score_clinical_t2d_case)

In [None]:
all_tidy_eur_98th_score_clinical_t2d_control = read.csv('AOU.EUR.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                        sep = '\t')
dim(all_tidy_eur_98th_score_clinical_t2d_control)

###### HT YK

In [None]:
all_tidy_eur_98th_score_clinical_ht_yk_case = read.csv('AOU.EUR.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                       sep = '\t')
dim(all_tidy_eur_98th_score_clinical_ht_yk_case)

In [None]:
all_tidy_eur_98th_score_clinical_ht_yk_control = read.csv('AOU.EUR.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                          sep = '\t')
dim(all_tidy_eur_98th_score_clinical_ht_yk_control)

###### age

In [None]:
all_tidy_eur_98th_score_clinical_age_less_60 = read.csv('AOU.EUR.CKD.YK_Phenotyping.AGE_less_60.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                        sep = '\t')
dim(all_tidy_eur_98th_score_clinical_age_less_60)

In [None]:
all_tidy_eur_98th_score_clinical_age_greater_equal_60 = read.csv('AOU.EUR.CKD.YK_Phenotyping.AGE_greater_equal_60.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                                 sep = '\t')
dim(all_tidy_eur_98th_score_clinical_age_greater_equal_60)

### AFR

#### no clinical covariates

In [None]:
all_tidy_afr_znorm2_score = read.csv('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.glm.score.tidy.txt',
                                     sep = '\t')
dim(all_tidy_afr_znorm2_score)

In [None]:
all_tidy_afr_98th_score = read.csv('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                         sep = '\t')
dim(all_tidy_afr_98th_score)

#### clinical covariates in model

##### znorm2

In [None]:
all_tidy_afr_znorm2_score_clinical_ht_yk_t2d = read.csv('AOU.AFR.CKD.YK_Phenotyping.clinical_covariates.HT_YK.T2D.PGS_Znorm2.glm.score.tidy.txt',
                                                        sep = '\t')
dim(all_tidy_afr_znorm2_score_clinical_ht_yk_t2d)

In [None]:
all_tidy_afr_znorm2_score_clinical_t2d = read.csv('AOU.AFR.CKD.YK_Phenotyping.clinical_covariates.T2D_only.PGS_Znorm2.glm.score.tidy.txt',
                                                  sep = '\t')
dim(all_tidy_afr_znorm2_score_clinical_t2d)

In [None]:
all_tidy_afr_znorm2_score_clinical_ht_yk = read.csv('AOU.AFR.CKD.YK_Phenotyping.clinical_covariates.HT_YK_only.PGS_Znorm2.glm.score.tidy.txt',
                                                    sep = '\t')
dim(all_tidy_afr_znorm2_score_clinical_ht_yk)

##### 98th percentile

In [None]:
all_tidy_afr_98th_score_clinical_ht_yk_t2d = read.csv('AOU.AFR.CKD.YK_Phenotyping.clinical_covariates.HT_YK.T2D.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                      sep = '\t')
dim(all_tidy_afr_98th_score_clinical_ht_yk_t2d)

In [None]:
all_tidy_afr_98th_score_clinical_t2d = read.csv('AOU.AFR.CKD.YK_Phenotyping.clinical_covariates.T2D_only.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                sep = '\t')
dim(all_tidy_afr_98th_score_clinical_t2d)

In [None]:
all_tidy_afr_98th_score_clinical_ht_yk = read.csv('AOU.AFR.CKD.YK_Phenotyping.clinical_covariates.HT_YK_only.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                  sep = '\t')
dim(all_tidy_afr_98th_score_clinical_ht_yk)

#### stratified by clinical covariate cases and controls

##### znorm2

###### t2d

In [None]:
all_tidy_afr_znorm2_score_clinical_t2d_case = read.csv('AOU.AFR.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.glm.score.tidy.txt',
                                                       sep = '\t')
dim(all_tidy_afr_znorm2_score_clinical_t2d_case)

In [None]:
all_tidy_afr_znorm2_score_clinical_t2d_control = read.csv('AOU.AFR.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.glm.score.tidy.txt',
                                                          sep = '\t')
dim(all_tidy_afr_znorm2_score_clinical_t2d_control)

###### ht yk

In [None]:
all_tidy_afr_znorm2_score_clinical_ht_yk_case = read.csv('AOU.AFR.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.glm.score.tidy.txt',
                                                         sep = '\t')
dim(all_tidy_afr_znorm2_score_clinical_ht_yk_case)

In [None]:
all_tidy_afr_znorm2_score_clinical_ht_yk_control = read.csv('AOU.AFR.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.glm.score.tidy.txt',
                                                            sep = '\t')
dim(all_tidy_afr_znorm2_score_clinical_ht_yk_control)

###### age

In [None]:
all_tidy_afr_znorm2_score_clinical_age_less_60 = read.csv('AOU.AFR.CKD.YK_Phenotyping.AGE_less_60.PGS_Znorm2.glm.score.tidy.txt',
                                                          sep = '\t')
dim(all_tidy_afr_znorm2_score_clinical_age_less_60)

In [None]:
all_tidy_afr_znorm2_score_clinical_age_greater_equal_60 = read.csv('AOU.AFR.CKD.YK_Phenotyping.AGE_greater_equal_60.PGS_Znorm2.glm.score.tidy.txt',
                                                                   sep = '\t')
dim(all_tidy_afr_znorm2_score_clinical_age_greater_equal_60)

##### 98th percentile

###### t2d

In [None]:
all_tidy_afr_98th_score_clinical_t2d_case = read.csv('AOU.AFR.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                     sep = '\t')
dim(all_tidy_afr_98th_score_clinical_t2d_case)

In [None]:
all_tidy_afr_98th_score_clinical_t2d_control = read.csv('AOU.AFR.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                        sep = '\t')
dim(all_tidy_afr_98th_score_clinical_t2d_control)

###### ht yk

In [None]:
all_tidy_afr_98th_score_clinical_ht_yk_case = read.csv('AOU.AFR.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                       sep = '\t')
dim(all_tidy_afr_98th_score_clinical_ht_yk_case)

In [None]:
all_tidy_afr_98th_score_clinical_ht_yk_control = read.csv('AOU.AFR.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                          sep = '\t')
dim(all_tidy_afr_98th_score_clinical_ht_yk_control)

###### age

In [None]:
all_tidy_afr_98th_score_clinical_age_less_60 = read.csv('AOU.AFR.CKD.YK_Phenotyping.AGE_less_60.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                        sep = '\t')
dim(all_tidy_afr_98th_score_clinical_age_less_60)

In [None]:
all_tidy_afr_98th_score_clinical_age_greater_equal_60 = read.csv('AOU.AFR.CKD.YK_Phenotyping.AGE_greater_equal_60.PGS_Znorm2.98th_percentile.glm.score.tidy.txt',
                                                                 sep = '\t')
dim(all_tidy_afr_98th_score_clinical_age_greater_equal_60)

## create forest plots- Znorm2 only

In [None]:
all_tidy_znorm2_score %>% 
ggplot(., aes(x = estimate, y = SCORE, xmin = conf.low, xmax = conf.high)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
geom_vline(xintercept = 1, linetype = "dashed", color = "#4C4E52", linewidth = 0.75) +
labs(x = "Odds Ratio (OR)", y = "Score in Model") +
ggtitle('AOU ALL CKD PGS Z_norm2 Odds Ratio') +
theme_minimal() +
theme(legend.position = "top") +
theme(text = element_text(size = 10)) 
ggsave("AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.glm.odds_ratio.forestplot.png")

In [None]:
all_tidy_eur_znorm2_score %>% 
ggplot(., aes(x = estimate, y = SCORE, xmin = conf.low, xmax = conf.high)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
geom_vline(xintercept = 1, linetype = "dashed", color = "#4C4E52", linewidth = 0.75) +
labs(x = "Odds Ratio (OR)", y = "Score in Model") +
ggtitle('AOU EUR CKD PGS Z_norm2 Odds Ratio') +
theme_minimal() +
theme(legend.position = "top") +
theme(text = element_text(size = 10)) 
ggsave("AOU.EUR.CKD.YK_phenotyping.PGS_Znorm2.glm.odds_ratio.forestplot.png")

In [None]:
all_tidy_afr_znorm2_score %>% 
ggplot(., aes(x = estimate, y = SCORE, xmin = conf.low, xmax = conf.high)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
geom_vline(xintercept = 1, linetype = "dashed", color = "#4C4E52", linewidth = 0.75) +
labs(x = "Odds Ratio (OR)", y = "Score in Model") +
ggtitle('AOU AFR CKD PGS Z_norm2 Odds Ratio') +
theme_minimal() +
theme(legend.position = "top") +
theme(text = element_text(size = 10)) 
ggsave("AOU.AFR.CKD.YK_phenotyping.PGS_Znorm2.glm.odds_ratio.forestplot.png")

## create combined forest plots with Znorm2 + 98th percentile

In [None]:
all_tidy_znorm2_score %>%
rbind(all_tidy_98th_score) %>%
mutate(term = gsub('PRED_CASE', '98th_percentile', term)) %>%
ggplot(., aes(x = estimate, y = SCORE, xmin = conf.low, xmax = conf.high, color = term)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
geom_vline(xintercept = 1, linetype = "dashed", color = "#4C4E52", linewidth = 0.75) +
labs(x = "Odds Ratio (OR)", y = "Score in Model", color = "Case Classification Method") +
ggtitle('AOU ALL CKD PGS Odds Ratio') +
theme_minimal() +
#theme(legend.position = "top") +
theme(text = element_text(size = 10)) 
ggsave("AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.98th_percentile.glm.odds_ratio.combined.forestplot.png")

In [None]:
all_tidy_eur_znorm2_score %>%
rbind(all_tidy_eur_98th_score) %>%
mutate(term = gsub('PRED_CASE', '98th_percentile', term)) %>%
ggplot(., aes(x = estimate, y = SCORE, xmin = conf.low, xmax = conf.high, color = term)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
geom_vline(xintercept = 1, linetype = "dashed", color = "#4C4E52", linewidth = 0.75) +
labs(x = "Odds Ratio (OR)", y = "Score in Model", color = "Case Classification Method") +
ggtitle('AOU EUR CKD PGS Odds Ratio') +
theme_minimal() +
#theme(legend.position = "top") +
theme(text = element_text(size = 10)) 
ggsave("AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.98th_percentile.glm.odds_ratio.combined.forestplot.png")

In [None]:
all_tidy_afr_znorm2_score %>%
rbind(all_tidy_afr_98th_score) %>%
mutate(term = gsub('PRED_CASE', '98th_percentile', term)) %>%
ggplot(., aes(x = estimate, y = SCORE, xmin = conf.low, xmax = conf.high, color = term)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
geom_vline(xintercept = 1, linetype = "dashed", color = "#4C4E52", linewidth = 0.75) +
labs(x = "Odds Ratio (OR)", y = "Score in Model", color = "Case Classification Method") +
ggtitle('AOU AFR CKD PGS Odds Ratio') +
theme_minimal() +
#theme(legend.position = "top") +
theme(text = element_text(size = 10)) 
ggsave("AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.98th_percentile.glm.odds_ratio.combined.forestplot.png")

## create forest plots with clinical covariates

### znorm2

#### all

In [None]:
all_tidy_znorm2_score %>%
mutate(term = gsub('Z_norm2', 'No Clinical Covariates', term)) %>%
rbind(all_tidy_znorm2_score_clinical_ht_yk_t2d) %>%
mutate(term = gsub('Z_norm2', 'All Clinical Covariates', term)) %>%
rbind(all_tidy_znorm2_score_clinical_t2d) %>%
mutate(term = gsub('Z_norm2', 'T2D Covariate Only', term)) %>%
rbind(all_tidy_znorm2_score_clinical_ht_yk) %>%
mutate(term = gsub('Z_norm2', 'HT Covariate Only', term)) %>%
rbind(all_tidy_znorm2_score_clinical_t2d_case) %>%
mutate(term = gsub('Z_norm2', 'T2D Cases Only', term)) %>%
rbind(all_tidy_znorm2_score_clinical_t2d_control) %>%
mutate(term = gsub('Z_norm2', 'T2D Controls Only', term)) %>%
rbind(all_tidy_znorm2_score_clinical_ht_yk_case) %>%
mutate(term = gsub('Z_norm2', 'HT Cases Only', term)) %>%
rbind(all_tidy_znorm2_score_clinical_ht_yk_control) %>%
mutate(term = gsub('Z_norm2', 'HT Controls Only', term)) %>%
rbind(all_tidy_znorm2_score_clinical_age_less_60) %>%
mutate(term = gsub('Z_norm2', 'Age < 60', term)) %>%
rbind(all_tidy_znorm2_score_clinical_age_greater_equal_60) %>%
mutate(term = gsub('Z_norm2', 'Age >= 60', term)) %>%
ggplot(., aes(x = estimate, y = SCORE, xmin = conf.low, xmax = conf.high, color = term)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
geom_vline(xintercept = 1, linetype = "dashed", color = "#4C4E52", linewidth = 0.75) +
labs(x = "Odds Ratio (OR)", y = "Score in Model", color = "Znorm2 Model") +
ggtitle('AOU ALL CKD PGS Odds Ratio with Clinical Covariates') +
theme_minimal() +
theme(text = element_text(size = 10))
ggsave("AOU.ALL.CKD.YK_Phenotyping.clinical_covariates.T2D.HT_YK.AGE.PGS_Znorm2.glm.odds_ratio.combined.forestplot.png",
       width = 12,
       height = 10)

##### eur

In [None]:
all_tidy_eur_znorm2_score %>%
mutate(term = gsub('Z_norm2', 'No Clinical Covariates', term)) %>%
rbind(all_tidy_eur_znorm2_score_clinical_ht_yk_t2d) %>%
mutate(term = gsub('Z_norm2', 'All Clinical Covariates', term)) %>%
rbind(all_tidy_eur_znorm2_score_clinical_t2d) %>%
mutate(term = gsub('Z_norm2', 'T2D Covariate Only', term)) %>%
rbind(all_tidy_eur_znorm2_score_clinical_ht_yk) %>%
mutate(term = gsub('Z_norm2', 'HT Covariate Only', term)) %>%
rbind(all_tidy_eur_znorm2_score_clinical_t2d_case) %>%
mutate(term = gsub('Z_norm2', 'T2D Cases Only', term)) %>%
rbind(all_tidy_eur_znorm2_score_clinical_t2d_control) %>%
mutate(term = gsub('Z_norm2', 'T2D Controls Only', term)) %>%
rbind(all_tidy_eur_znorm2_score_clinical_ht_yk_case) %>%
mutate(term = gsub('Z_norm2', 'HT Cases Only', term)) %>%
rbind(all_tidy_eur_znorm2_score_clinical_ht_yk_control) %>%
mutate(term = gsub('Z_norm2', 'HT Controls Only', term)) %>%
rbind(all_tidy_eur_znorm2_score_clinical_age_less_60) %>%
mutate(term = gsub('Z_norm2', 'Age < 60', term)) %>%
rbind(all_tidy_eur_znorm2_score_clinical_age_greater_equal_60) %>%
mutate(term = gsub('Z_norm2', 'Age >= 60', term)) %>%
ggplot(., aes(x = estimate, y = SCORE, xmin = conf.low, xmax = conf.high, color = term)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
geom_vline(xintercept = 1, linetype = "dashed", color = "#4C4E52", linewidth = 0.75) +
labs(x = "Odds Ratio (OR)", y = "Score in Model", color = "Znorm2 Model") +
ggtitle('AOU EUR CKD PGS Odds Ratio with Clinical Covariates') +
theme_minimal() +
theme(text = element_text(size = 10))
ggsave("AOU.EUR.CKD.YK_Phenotyping.clinical_covariates.T2D.HT_YK.AGE.PGS_Znorm2.glm.odds_ratio.combined.forestplot.png",
       width = 12,
       height = 10)

### afr

In [None]:
all_tidy_afr_znorm2_score %>%
mutate(term = gsub('Z_norm2', 'No Clinical Covariates', term)) %>%
rbind(all_tidy_afr_znorm2_score_clinical_ht_yk_t2d) %>%
mutate(term = gsub('Z_norm2', 'All Clinical Covariates', term)) %>%
rbind(all_tidy_afr_znorm2_score_clinical_t2d) %>%
mutate(term = gsub('Z_norm2', 'T2D Covariate Only', term)) %>%
rbind(all_tidy_afr_znorm2_score_clinical_ht_yk) %>%
mutate(term = gsub('Z_norm2', 'HT Covariate Only', term)) %>%
rbind(all_tidy_afr_znorm2_score_clinical_t2d_case) %>%
mutate(term = gsub('Z_norm2', 'T2D Cases Only', term)) %>%
rbind(all_tidy_afr_znorm2_score_clinical_t2d_control) %>%
mutate(term = gsub('Z_norm2', 'T2D Controls Only', term)) %>%
rbind(all_tidy_afr_znorm2_score_clinical_ht_yk_case) %>%
mutate(term = gsub('Z_norm2', 'HT Cases Only', term)) %>%
rbind(all_tidy_afr_znorm2_score_clinical_ht_yk_control) %>%
mutate(term = gsub('Z_norm2', 'HT Controls Only', term)) %>%
rbind(all_tidy_afr_znorm2_score_clinical_age_less_60) %>%
mutate(term = gsub('Z_norm2', 'Age < 60', term)) %>%
rbind(all_tidy_afr_znorm2_score_clinical_age_greater_equal_60) %>%
mutate(term = gsub('Z_norm2', 'Age >= 60', term)) %>%
ggplot(., aes(x = estimate, y = SCORE, xmin = conf.low, xmax = conf.high, color = term)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
geom_vline(xintercept = 1, linetype = "dashed", color = "#4C4E52", linewidth = 0.75) +
labs(x = "Odds Ratio (OR)", y = "Score in Model", color = "Znorm2 Model") +
ggtitle('AOU AFR CKD PGS Odds Ratio with Clinical Covariates') +
theme_minimal() +
theme(text = element_text(size = 10))
ggsave("AOU.AFR.CKD.YK_Phenotyping.clinical_covariates.T2D.HT_YK.AGE.PGS_Znorm2.glm.odds_ratio.combined.forestplot.png",
       width = 12,
       height = 10)

### 98th percentile

#### all

In [None]:
all_tidy_98th_score %>%
mutate(term = gsub('PRED_CASE', 'No Clinical Covariates', term)) %>%
rbind(all_tidy_98th_score_clinical_ht_yk_t2d) %>%
mutate(term = gsub('PRED_CASE', 'All Clinical Covariates', term)) %>%
rbind(all_tidy_98th_score_clinical_t2d) %>%
mutate(term = gsub('PRED_CASE', 'T2D Covariate Only', term)) %>%
rbind(all_tidy_98th_score_clinical_ht_yk) %>%
mutate(term = gsub('PRED_CASE', 'HT Covariate Only', term)) %>%
rbind(all_tidy_98th_score_clinical_t2d_case) %>%
mutate(term = gsub('PRED_CASE', 'T2D Cases Only', term)) %>%
rbind(all_tidy_98th_score_clinical_t2d_control) %>%
mutate(term = gsub('PRED_CASE', 'T2D Controls Only', term)) %>%
rbind(all_tidy_98th_score_clinical_ht_yk_case) %>%
mutate(term = gsub('PRED_CASE', 'HT Cases Only', term)) %>%
rbind(all_tidy_98th_score_clinical_ht_yk_control) %>%
mutate(term = gsub('PRED_CASE', 'HT Controls Only', term)) %>%
rbind(all_tidy_98th_score_clinical_age_less_60) %>%
mutate(term = gsub('PRED_CASE', 'Age < 60', term)) %>%
rbind(all_tidy_98th_score_clinical_age_greater_equal_60) %>%
mutate(term = gsub('PRED_CASE', 'Age >= 60', term)) %>%
ggplot(., aes(x = estimate, y = SCORE, xmin = conf.low, xmax = conf.high, color = term)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
geom_vline(xintercept = 1, linetype = "dashed", color = "#4C4E52", linewidth = 0.75) +
labs(x = "Odds Ratio (OR)", y = "Score in Model", color = "98th Percentile Model") +
ggtitle('AOU ALL CKD PGS Odds Ratio with Clinical Covariates') +
theme_minimal() +
theme(text = element_text(size = 10))
ggsave("AOU.ALL.CKD.YK_Phenotyping.clinical_covariates.T2D.HT_YK.AGE.PGS_Znorm2.98th_percentile.glm.odds_ratio.combined.forestplot.png",
       width = 12,
       height = 10)

#### eur

In [None]:
all_tidy_eur_98th_score %>%
mutate(term = gsub('PRED_CASE', 'No Clinical Covariates', term)) %>%
rbind(all_tidy_eur_98th_score_clinical_ht_yk_t2d) %>%
mutate(term = gsub('PRED_CASE', 'All Clinical Covariates', term)) %>%
rbind(all_tidy_eur_98th_score_clinical_t2d) %>%
mutate(term = gsub('PRED_CASE', 'T2D Covariate Only', term)) %>%
rbind(all_tidy_eur_98th_score_clinical_ht_yk) %>%
mutate(term = gsub('PRED_CASE', 'HT Covariate Only', term)) %>%
rbind(all_tidy_eur_98th_score_clinical_t2d_case) %>%
mutate(term = gsub('PRED_CASE', 'T2D Cases Only', term)) %>%
rbind(all_tidy_eur_98th_score_clinical_t2d_control) %>%
mutate(term = gsub('PRED_CASE', 'T2D Controls Only', term)) %>%
rbind(all_tidy_eur_98th_score_clinical_ht_yk_case) %>%
mutate(term = gsub('PRED_CASE', 'HT Cases Only', term)) %>%
rbind(all_tidy_eur_98th_score_clinical_ht_yk_control) %>%
mutate(term = gsub('PRED_CASE', 'HT Controls Only', term)) %>%
rbind(all_tidy_eur_98th_score_clinical_age_less_60) %>%
mutate(term = gsub('PRED_CASE', 'Age < 60', term)) %>%
rbind(all_tidy_eur_98th_score_clinical_age_greater_equal_60) %>%
mutate(term = gsub('PRED_CASE', 'Age >= 60', term)) %>%
ggplot(., aes(x = estimate, y = SCORE, xmin = conf.low, xmax = conf.high, color = term)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
geom_vline(xintercept = 1, linetype = "dashed", color = "#4C4E52", linewidth = 0.75) +
labs(x = "Odds Ratio (OR)", y = "Score in Model", color = "98th Percentile Model") +
ggtitle('AOU EUR CKD PGS Odds Ratio with Clinical Covariates') +
theme_minimal() +
theme(text = element_text(size = 10))
ggsave("AOU.EUR.CKD.YK_Phenotyping.clinical_covariates.T2D.HT_YK.AGE.PGS_Znorm2.98th_percentile.glm.odds_ratio.combined.forestplot.png",
       width = 12,
       height = 10)

#### afr

In [None]:
all_tidy_afr_98th_score %>%
mutate(term = gsub('PRED_CASE', 'No Clinical Covariates', term)) %>%
rbind(all_tidy_afr_98th_score_clinical_ht_yk_t2d) %>%
mutate(term = gsub('PRED_CASE', 'All Clinical Covariates', term)) %>%
rbind(all_tidy_afr_98th_score_clinical_t2d) %>%
mutate(term = gsub('PRED_CASE', 'T2D Covariate Only', term)) %>%
rbind(all_tidy_afr_98th_score_clinical_ht_yk) %>%
mutate(term = gsub('PRED_CASE', 'HT Covariate Only', term)) %>%
rbind(all_tidy_afr_98th_score_clinical_t2d_case) %>%
mutate(term = gsub('PRED_CASE', 'T2D Cases Only', term)) %>%
rbind(all_tidy_afr_98th_score_clinical_t2d_control) %>%
mutate(term = gsub('PRED_CASE', 'T2D Controls Only', term)) %>%
rbind(all_tidy_afr_98th_score_clinical_ht_yk_case) %>%
mutate(term = gsub('PRED_CASE', 'HT Cases Only', term)) %>%
rbind(all_tidy_afr_98th_score_clinical_age_less_60) %>%
mutate(term = gsub('PRED_CASE', 'Age < 60', term)) %>%
rbind(all_tidy_afr_98th_score_clinical_age_greater_equal_60) %>%
mutate(term = gsub('PRED_CASE', 'Age >= 60', term)) %>%
ggplot(., aes(x = estimate, y = SCORE, xmin = conf.low, xmax = conf.high, color = term)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
geom_vline(xintercept = 1, linetype = "dashed", color = "#4C4E52", linewidth = 0.75) +
labs(x = "Odds Ratio (OR)", y = "Score in Model", color = "98th Percentile Model") +
ggtitle('AOU AFR CKD PGS Odds Ratio with Clinical Covariates') +
theme_minimal() +
theme(text = element_text(size = 10))
ggsave("AOU.AFR.CKD.YK_Phenotyping.clinical_covariates.T2D.HT_YK.AGE.PGS_Znorm2.98th_percentile.glm.odds_ratio.combined.forestplot.png",
       width = 12,
       height = 10)

## assess significance

In [None]:
all_tidy_znorm2_score %>%
filter(estimate < 1 | p.value > 0.05) %>%
distinct(SCORE, estimate, p.value)

In [None]:
all_tidy_eur_znorm2_score %>%
filter(estimate < 1 | p.value > 0.05) %>%
distinct(SCORE, estimate, p.value)

In [None]:
all_tidy_afr_znorm2_score %>%
filter(estimate < 1 | p.value > 0.05) %>%
distinct(SCORE, estimate, p.value)

# compute AUROC & brier scores
- add clinical covariates (T2D and/or HT) by adding them as variables in the regression model
- filter regressions by clinical covariate case control status (T2D or HT) or age binning (less than or greater than 60) by filtering the "one score" dataframe

## make score lists
- remove scores with insignificant association and/or negative effect size
- multiancestry list: only EUR and multiancestry PGS

In [None]:
score_list = unique(all_score$PGS)
score_list = score_list[!score_list %in% c("PGS002757",
                                           "PGS005090",
                                           "AFR.eGFR.PRScsx",
                                           "AMR.eGFR.PRScsx",
                                           "EAS.eGFR.PRScsx",
                                           "EUR.eGFR.PRScsx")]
length(score_list)
print(score_list)
class(score_list)

In [None]:
score_list_multiancestry = unique(all_score$PGS)
score_list_multiancestry = score_list_multiancestry[!score_list_multiancestry %in% c("PGS002757",
                                                                                     "PGS005090",
                                                                                     "AFR.eGFR.PRScsx",
                                                                                     "AMR.eGFR.PRScsx",
                                                                                     "EAS.eGFR.PRScsx",
                                                                                     "EUR.eGFR.PRScsx",
                                                                                     "AFR.eGFR.flip.PRScsx",
                                                                                     "AMR.eGFR.flip.PRScsx",
                                                                                     "EAS.eGFR.flip.PRScsx",
                                                                                     "AFR.Phe_585.3.PRScsx",
                                                                                     "EAS.Phe_585.3.PRScsx")]
length(score_list_multiancestry)
print(score_list_multiancestry)
class(score_list_multiancestry)

## run models on all individuals and make discrimination & calibration plots

In [None]:
# set glm model
message('setting parameters')
glm_model <- 
logistic_reg() %>% 
set_engine("glm")

# define metrics of interest as brier score, auroc & logloss
cls_met <- metric_set(roc_auc, brier_class)
# set parameters
control <- control_resamples(save_pred = TRUE, save_workflow = TRUE)

# Create list of data frames to store model info
model_metrics_dfs = list()
workflow_dfs = list()

# Create an empty lists to store the results for each predictor
roc_plot_list <- list()
cal_plot_list <- list()

# loop through scores
message('starting for loop')
for (score in score_list) {
    index <- which(score_list == score)
    message(paste0('starting ', score, ' at index ', index))
    
    # set seed again
    set.seed(7)
    
    # filter df to that score
    scores_vfold = all_score %>%
    filter(grepl(score, PGS))
    
    # set number of folds and repeats
    folds_x <- vfold_cv(scores_vfold, v = 10, repeats = 6, strata = NULL, pool = 0.1)
    
    # build workflow
    glm_workflow <- workflow() %>% 
    add_model(glm_model) %>% 
    add_variables(outcome = CKD, predictors = c(Z_norm2, AGE, SEX)) # Remove Age and Sex if looking to test null model with just outcome ~ PG
    # run model with cross validation
    glm_fit_all <- fit_resamples(glm_workflow, folds_x, metrics = cls_met, control = control)
    
    # collect metrics
    metrics = data.frame(collect_metrics(glm_fit_all))
    metrics = metrics %>%
    dplyr::mutate(predictor = score)
    model_metrics_dfs[[score]] <- metrics
    
    # collect predictions
    preds <- collect_predictions(glm_fit_all)
    
    # collect workflow
    glm_workflow <- as_workflow_set (!!score := glm_fit_all) 
    workflow_dfs[[score]] <- glm_workflow
    
    # make AUROC plot data
    roc_data <- preds %>%  
    roc_curve(CKD, .pred_1, event_level = "second")%>%
    dplyr::mutate(predictor = score)
    
    roc_plot_list[[score]] <- roc_data
  
    # make calibration plot
    cal_plot <- preds %>%
    cal_plot_logistic(CKD, .pred_0, smooth = FALSE, conf_level = 0.95,  include_rug = FALSE) + 
    ggtitle(paste0('AOU ALL CKD ', score, ' Calibration'))
      
    cal_plot_list[[score]] <- cal_plot
}


## run models on all individuals with multiancestry scores

In [None]:
# set glm model
message('setting parameters')
glm_model <- 
logistic_reg() %>% 
set_engine("glm")

# define metrics of interest as brier score, auroc & logloss
cls_met <- metric_set(roc_auc, brier_class)
# set parameters
control <- control_resamples(save_pred = TRUE, save_workflow = TRUE)

# make list of dfs
workflow_dfs_multiancestry_score = list()

# loop through scores
message('starting for loop')
for (score in score_list_multiancestry) {
    index <- which(score_list_multiancestry == score)
    message(paste0('starting ', score, ' at index ', index))
    
    # set seed again
    set.seed(7)
    
    # filter df to that score
    scores_vfold = all_score %>%
    filter(grepl(score, PGS))
    
    # set number of folds and repeats
    folds_x <- vfold_cv(scores_vfold, v = 10, repeats = 6, strata = NULL, pool = 0.1)
    
    # build workflow
    glm_workflow <- workflow() %>% 
    add_model(glm_model) %>% 
    add_variables(outcome = CKD, predictors = c(Z_norm2, AGE, SEX)) # Remove Age and Sex if looking to test null model with just outcome ~ PGS
    
    # run model with cross validation
    glm_fit_all <- fit_resamples(glm_workflow, folds_x, metrics = cls_met, control = control)
    
    # collect workflow
    glm_workflow <- as_workflow_set (!!score := glm_fit_all) 
    workflow_dfs_multiancestry_score[[score]] <- glm_workflow
}

## run models on EUR individuals and make discrimination/calibration plots

In [None]:
# set glm model
message('setting parameters')
glm_model <- 
logistic_reg() %>% 
set_engine("glm")

# define metrics of interest as brier score, auroc & logloss
cls_met <- metric_set(roc_auc, brier_class)
# set parameters
control <- control_resamples(save_pred = TRUE, save_workflow = TRUE)

# Create list of data frames to store model info
model_metrics_eur_dfs = list()
workflow_eur_dfs = list()

# Create an empty lists to store the results for each predictor
roc_plot_list_eur <- list()
cal_plot_list_eur <- list()

# loop through scores
message('starting for loop')
for (score in score_list) {
    index <- which(score_list == score)
    message(paste0('starting ', score, ' at index ', index))
    
    # set seed again
    set.seed(7)
    
    # filter df to that score
    scores_vfold = all_score_eur %>%
    filter(grepl(score, PGS))
    
    # set number of folds and repeats
    folds_x <- vfold_cv(scores_vfold, v = 10, repeats = 6, strata = NULL, pool = 0.1)
    
    # build workflow
    glm_workflow <- workflow() %>% 
    add_model(glm_model) %>% 
    add_variables(outcome = CKD, predictors = c(Z_norm2, AGE, SEX)) # Remove Age and Sex if looking to test null model with just outcome ~ PGS
    
    # run model with cross validation
    glm_fit_all <- fit_resamples(glm_workflow, folds_x, metrics = cls_met, control = control)
    
    # collect metrics
    metrics = data.frame(collect_metrics(glm_fit_all))
    metrics = metrics%>%
    dplyr::mutate(predictor = score)
    model_metrics_eur_dfs[[score]] <- metrics
    
    # collect predictions
    preds <- collect_predictions(glm_fit_all)
    
    # collect workflow
    glm_workflow <- as_workflow_set (!!score := glm_fit_all) 
    workflow_eur_dfs[[score]] <- glm_workflow
    
    # make AUROC plot data
    roc_data <- preds %>%  
    roc_curve(CKD, .pred_1, event_level = "second")%>%
    dplyr::mutate(predictor = score)
    
    roc_plot_list_eur[[score]] <- roc_data
  
    # make calibration plot
    cal_plot <- preds %>%
    cal_plot_logistic(CKD, .pred_0, smooth = FALSE, conf_level = 0.95,  include_rug = FALSE) + 
    ggtitle(paste('AOU EUR CKD', score, 'Calibration'))
      
    cal_plot_list_eur[[score]] <- cal_plot
}


## run models on EUR individuals with multiancestry scores

In [None]:
# set glm model
message('setting parameters')
glm_model <- 
logistic_reg() %>% 
set_engine("glm")

# define metrics of interest as brier score, auroc & logloss
cls_met <- metric_set(roc_auc, brier_class)
# set parameters
control <- control_resamples(save_pred = TRUE, save_workflow = TRUE)

# make list of dfs
workflow_eur_multiancestry_score_dfs = list()

# loop through scores
message('starting for loop')
for (score in score_list_multiancestry) {
    index <- which(score_list_multiancestry == score)
    message(paste0('starting ', score, ' at index ', index))
    
    # set seed again
    set.seed(7)
    
    # filter df to that score
    scores_vfold = all_score_eur %>%
    filter(grepl(score, PGS))
    
    # set number of folds and repeats
    folds_x <- vfold_cv(scores_vfold, v = 10, repeats = 6, strata = NULL, pool = 0.1)
    
    # build workflow
    glm_workflow <- workflow() %>% 
    add_model(glm_model) %>% 
    add_variables(outcome = CKD, predictors = c(Z_norm2, AGE, SEX, HT_YK)) # Remove Age and Sex if looking to test null model with just outcome ~ PGS
    
    # run model with cross validation
    glm_fit_all <- fit_resamples(glm_workflow, folds_x, metrics = cls_met, control = control)
    
    # collect workflow
    glm_workflow <- as_workflow_set (!!score := glm_fit_all) 
    workflow_eur_multiancestry_score_dfs[[score]] <- glm_workflow
}

## run models on AFR individuals and make discrimination/calibration plots

In [None]:
# set glm model
message('setting parameters')
glm_model <- 
logistic_reg() %>% 
set_engine("glm")

# define metrics of interest as brier score, auroc & logloss
cls_met <- metric_set(roc_auc, brier_class)
# set parameters
control <- control_resamples(save_pred = TRUE, save_workflow = TRUE)

# Create list of data frames to store model info
model_metrics_afr_dfs = list()
workflow_afr_dfs = list()

# Create an empty lists to store the results for each predictor
roc_plot_list_afr <- list()
cal_plot_list_afr <- list()

# loop through scores
message('starting for loop')
for (score in score_list) {
    index <- which(score_list == score)
    message(paste0('starting ', score, ' at index ', index))
    
    # set seed again
    set.seed(7)
    
    # filter df to that score
    scores_vfold = all_score_afr %>%
    filter(grepl(score, PGS))
    
    # set number of folds and repeats
    folds_x <- vfold_cv(scores_vfold, v = 10, repeats = 6, strata = NULL, pool = 0.1)
    
    # build workflow
    glm_workflow <- workflow() %>% 
    add_model(glm_model) %>% 
    add_variables(outcome = CKD, predictors = c(Z_norm2, AGE, SEX)) # Remove Age and Sex if looking to test null model with just outcome ~ PGS
    
    # run model with cross validation
    glm_fit_all <- fit_resamples(glm_workflow, folds_x, metrics = cls_met, control = control)
    
    # collect metrics
    metrics = data.frame(collect_metrics(glm_fit_all))
    metrics = metrics%>%
    dplyr::mutate(predictor = score)
    model_metrics_afr_dfs[[score]] <- metrics
    
    # collect predictions
    preds <- collect_predictions(glm_fit_all)
    
    # collect workflow
    glm_workflow <- as_workflow_set (!!score := glm_fit_all) 
    workflow_afr_dfs[[score]] <- glm_workflow
    
    # make AUROC plot data
    roc_data <- preds %>%  
    roc_curve(CKD, .pred_1, event_level = "second")%>%
    dplyr::mutate(predictor = score)
    
    roc_plot_list_afr[[score]] <- roc_data
  
    # make calibration plot
    cal_plot <- preds %>%
    cal_plot_logistic(CKD, .pred_0, smooth = FALSE, conf_level = 0.95,  include_rug = FALSE) + 
    ggtitle(paste('AOU AFR CKD', score, 'Calibration'))
      
    cal_plot_list_afr[[score]] <- cal_plot
}


## run models on afr individuals with multiancestry scores

In [None]:
# set glm model
message('setting parameters')
glm_model <- 
logistic_reg() %>% 
set_engine("glm")

# define metrics of interest as brier score, auroc & logloss
cls_met <- metric_set(roc_auc, brier_class)
# set parameters
control <- control_resamples(save_pred = TRUE, save_workflow = TRUE)

# make list of dfs
workflow_afr_multiancestry_score_dfs = list()

# loop through scores
message('starting for loop')
for (score in score_list_multiancestry) {
    index <- which(score_list_multiancestry == score)
    message(paste0('starting ', score, ' at index ', index))
    
    # set seed again
    set.seed(7)
    
    # filter df to that score
    scores_vfold = all_score_afr %>%
    filter(grepl(score, PGS))
    
    # set number of folds and repeats
    folds_x <- vfold_cv(scores_vfold, v = 10, repeats = 6, strata = NULL, pool = 0.1)
    
    # build workflow
    glm_workflow <- workflow() %>% 
    add_model(glm_model) %>% 
    add_variables(outcome = CKD, predictors = c(Z_norm2, AGE, SEX, HT_YK)) # Remove Age and Sex if looking to test null model with just outcome ~ PGS
    
    # run model with cross validation
    glm_fit_all <- fit_resamples(glm_workflow, folds_x, metrics = cls_met, control = control)
    
    # collect workflow
    glm_workflow <- as_workflow_set (!!score := glm_fit_all) 
    workflow_afr_multiancestry_score_dfs[[score]] <- glm_workflow
}

## concatenate

### all

In [None]:
workflows = bind_rows(workflow_dfs)
nrow(workflows)
print(workflows)

In [None]:
model_metrics = bind_rows(model_metrics_dfs) %>%
select(predictor, .metric, mean) %>%
dplyr::rename('metric' = '.metric',
              'value' = 'mean')
nrow(model_metrics)
head(model_metrics)

In [None]:
roc_plot_data = bind_rows(roc_plot_list)
nrow(roc_plot_data)
head(roc_plot_data)

### all individuals multiancestry scores

In [None]:
workflows_multiancestry_score = bind_rows(workflow_dfs_multiancestry_score)
nrow(workflows_multiancestry_score)
print(workflows_multiancestry_score)

### eur

In [None]:
workflows_eur = bind_rows(workflow_eur_dfs)
nrow(workflows_eur)
print(workflows_eur)

In [None]:
model_metrics_eur = bind_rows(model_metrics_eur_dfs) %>%
select(predictor, .metric, mean) %>%
dplyr::rename('metric' = '.metric',
              'value' = 'mean')
nrow(model_metrics_eur)
head(model_metrics_eur)

In [None]:
roc_plot_data_eur = bind_rows(roc_plot_list_eur)
nrow(roc_plot_data_eur)
head(roc_plot_data_eur)

### eur individuals multiancestry scores

In [None]:
workflows_eur_multiancestry_score = bind_rows(workflow_eur_multiancestry_score_dfs)
nrow(workflows_eur_multiancestry_score)
print(workflows_eur_multiancestry_score)

### afr

In [None]:
workflows_afr = bind_rows(workflow_afr_dfs)
nrow(workflows_afr)
print(workflows_afr)

In [None]:
model_metrics_afr = bind_rows(model_metrics_afr_dfs) %>%
select(predictor, .metric, mean) %>%
dplyr::rename('metric' = '.metric',
              'value' = 'mean')
nrow(model_metrics_afr)
head(model_metrics_afr)

In [None]:
roc_plot_data_afr = bind_rows(roc_plot_list_afr)
nrow(roc_plot_data_afr)
head(roc_plot_data_afr)

### afr individuals multiancestry scores

In [None]:
workflows_afr_multiancestry_score = bind_rows(workflow_afr_multiancestry_score_dfs)
nrow(workflows_afr_multiancestry_score)
print(workflows_afr_multiancestry_score)

## make/show & export plots

### AUROC

In [None]:
ggplot(roc_plot_data, aes(x = 1 - specificity, y = sensitivity, color = predictor)) +
geom_line(linewidth = 1.2) +
geom_abline(linetype = "dashed") +
labs(title = "AOU ALL CKD PGS ROC Curves", x = "1 - Specificity", y = "Sensitivity") +
theme_minimal()
ggsave("AOU.ALL.CKD.YK_Phenotyping.clinical_covariates.PGS_Znorm2.cv_glm.roc_curve.png")

In [None]:
ggplot(roc_plot_data_eur, aes(x = 1 - specificity, y = sensitivity, color = predictor)) +
geom_line(linewidth = 1.2) +
geom_abline(linetype = "dashed") +
labs(title = "AOU EUR CKD PGS ROC Curves", x = "1 - Specificity", y = "Sensitivity") +
theme_minimal()
ggsave("AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.roc_curve.png")

In [None]:
ggplot(roc_plot_data_afr, aes(x = 1 - specificity, y = sensitivity, color = predictor)) +
geom_line(linewidth = 1.2) +
geom_abline(linetype = "dashed") +
labs(title = "AOU AFR CKD PGS ROC Curves", x = "1 - Specificity", y = "Sensitivity") +
theme_minimal()
ggsave("AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.roc_curve.png")

### Calibration

#### show

In [None]:
cal_plot_list

#### export as RDS to optimize storage

In [None]:
plot_list_cal <- lapply(cal_plot_list, function(l) {drop_vars(l)}) %>% 
saveRDS(., file = "AOU.ALL.CKD.YK_Phenotyping.clincial_covariates.PGS_Znorm2.cv_glm.calibration_plot.rds")

In [None]:
plot_list_cal_eur <- lapply(cal_plot_list_eur, function(l) {drop_vars(l)}) %>% 
saveRDS(., file = "AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.calibration_plot.rds")

In [None]:
plot_list_cal_afr <- lapply(cal_plot_list_afr, function(l) {drop_vars(l)}) %>% 
saveRDS(., file = "AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.calibration_plot.rds")

#### export one plot

In [None]:
cal_plot_list = readRDS('AOU.ALL.CKD.PGS_Znorm2.cv_glm.calibration_plot.rds')

In [None]:
cal_plot_list[[1]]
ggsave('AOU.ALL.CKD.YK_Phenotyping.clinical_covariates.PGS_Znorm2.cv_glm.AFR.Phe_585.3.PRScsx.calibration_plot.png')

In [None]:
cal_plot_list_eur[[1]]
ggsave('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.AFR.Phe_585.3.PRScsx.calibration_plot.png')

In [None]:
cal_plot_list_afr[[1]]
ggsave('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.AFR.Phe_585.3.PRScsx.calibration_plot.png')

## make wide form model metrics df

In [None]:
model_metrics_wide = model_metrics %>%
pivot_wider(values_from = 'value',
            names_from = 'metric')
head(model_metrics_wide)

In [None]:
model_metrics_wide_eur = model_metrics_eur %>%
pivot_wider(values_from = 'value',
            names_from = 'metric')
head(model_metrics_wide_eur)

In [None]:
model_metrics_wide_afr = model_metrics_afr %>%
pivot_wider(values_from = 'value',
            names_from = 'metric')
head(model_metrics_wide_afr)

## export model stats

In [None]:
write.table(model_metrics,
           'AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.performance_metrics.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

In [None]:
write.table(model_metrics_wide,
           'AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.performance_metrics.wide_form.csv',
           sep = ',',
           col.names = T,
           row.names = F,
           quote = F)

In [None]:
write.table(model_metrics_eur,
           'AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.performance_metrics.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

In [None]:
write.table(model_metrics_wide_eur,
           'AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.performance_metrics.wide_form.csv',
           sep = ',',
           col.names = T,
           row.names = F,
           quote = F)

In [None]:
write.table(model_metrics_afr,
           'AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.performance_metrics.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

In [None]:
write.table(model_metrics_wide_afr,
           'AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.performance_metrics.wide_form.csv',
           sep = ',',
           col.names = T,
           row.names = F,
           quote = F)

# bayesian analysis of resampling statistics

## run models on all individuals

In [None]:
message('running model')
perf_model_brier <- perf_mod(workflows, metric = "brier_class", seed = 7, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_brier, file = 'AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.brier.rds')

In [None]:
message('running model')
perf_model_auc <- perf_mod(workflows, metric = "roc_auc", seed = 7, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_auc, file = 'AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.auc.rds')

## run models in all individuals with multiancestry scores

In [None]:
message('running model')
perf_model_brier_multiancestry_score <- perf_mod(workflows_multiancestry_score, metric = "brier_class", seed = 7, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_brier_multiancestry_score, file = 'AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.brier.rds')

In [None]:
message('running model')
perf_model_auc_multiancestry_score <- perf_mod(workflows_multiancestry_score, metric = "roc_auc", seed = 7, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_auc_multiancestry_score, file = 'AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.auc.rds')

## run models on EUR individuals

In [None]:
message('running model')
perf_model_brier_eur <- perf_mod(workflows_eur, metric = "brier_class", seed = 7, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_brier_eur, file = 'AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.brier.rds')

In [None]:
message('running model')
perf_model_auc_eur <- perf_mod(workflows_eur, metric = "roc_auc", seed = 7, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_auc_eur, file = 'AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.auc.rds')

## run models on EUR individuals with multiancestry scores

In [None]:
message('running model')
perf_model_brier_eur_multiancestry_score <- perf_mod(workflows_eur_multiancestry_score, metric = "brier_class", seed = 7, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_brier_eur_multiancestry_score, file = 'AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.brier.rds')

In [None]:
message('running model')
perf_model_auc_eur_multiancestry_score <- perf_mod(workflows_eur_multiancestry_score, metric = "roc_auc", seed = 7, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_auc_eur_multiancestry_score, file = 'AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.auc.rds')

## run models on AFR individuals

In [None]:
message('running model')
perf_model_brier_afr <- perf_mod(workflows_afr, metric = "brier_class", seed = 7, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_brier_afr, file = 'AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.brier.rds')

In [None]:
message('running model')
perf_model_auc_afr <- perf_mod(workflows_afr, metric = "roc_auc", seed = 7, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_auc_afr, file = 'AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.auc.rds')

## run models on afr individuals with multiancestry scores

In [None]:
message('running model')
perf_model_brier_afr_multiancestry_score <- perf_mod(workflows_afr_multiancestry_score, metric = "brier_class", seed = 7, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_brier_afr_multiancestry_score, file = 'AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.brier.rds')

In [None]:
message('running model')
perf_model_auc_afr_multiancestry_score <- perf_mod(workflows_afr_multiancestry_score, metric = "roc_auc", seed = 7, refresh = 0, iter = 10000, cores = 8)
message('saving model')
saveRDS(perf_model_auc_afr_multiancestry_score, file = 'AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.auc.rds')

## process models

### all individuals

#### brier

In [None]:
# Brier
best_brier <- tidy(perf_model_brier, seed = 7) %>%
summary() %>% 
slice_min(mean) %>%
pull(model)

contrast_models_brier_02 <- list()
contrast_models_brier_01 <- list()
contrast_models_brier_005 <- list()

for (score in score_list) {
  
    diff <-  contrast_models(
        perf_model_brier,
        list_1 = score,
        list_2 = best_brier,
        seed = 7)
    
    model <- score
  
    contrast_models_brier_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)
  
    contrast_models_brier_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
    contrast_models_brier_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005)
}

brier_ci <- perf_model_brier %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(lower <= min(upper), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_brier_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability>=0.95,0,1)) %>% 
mutate(metric = "Brier Score") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_brier)

#### auc

In [None]:
# AUC
best_auc <- tidy(perf_model_auc, seed = 7) %>%
summary() %>% 
slice_max(mean) %>%
pull(model)

contrast_models_auc_02 <- list()
contrast_models_auc_01 <- list()
contrast_models_auc_005 <- list()

for (score in score_list) {
  
    diff <-  contrast_models(
        perf_model_auc,
        list_1 = best_auc,
        list_2 = score,
        seed = 7) 
  
    model <- score
  
    contrast_models_auc_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)
  
    contrast_models_auc_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
    contrast_models_auc_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005)
}


# Probability = proportion of the posterior that is > 0 (probability that the positive difference is real)

auc_ci <- perf_model_auc %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(upper >= max(lower), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_auc_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_auc_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_auc_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability>=0.95,0,1)) %>% 
mutate(metric = "AU ROC") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_auc)

### all individuals multiancestry scores

#### brier

In [None]:
# Brier
best_brier_multiancestry_score <- tidy(perf_model_brier_multiancestry_score, seed = 7) %>%
summary() %>% 
slice_min(mean) %>%
pull(model)

contrast_models_brier_02 <- list()
contrast_models_brier_01 <- list()
contrast_models_brier_005 <- list()

for (score in score_list_multiancestry) {
  
    diff <-  contrast_models(
        perf_model_brier_multiancestry_score,
        list_1 = score,
        list_2 = best_brier_multiancestry_score,
        seed = 7)
    
    model <- score
  
    contrast_models_brier_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)
  
    contrast_models_brier_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
    contrast_models_brier_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005)
}

brier_ci_multiancestry_score <- perf_model_brier_multiancestry_score %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(lower <= min(upper), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_brier_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability>=0.95,0,1)) %>% 
mutate(metric = "Brier Score") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_brier_multiancestry_score)

#### auc

In [None]:
# AUC
best_auc_multiancestry_score <- tidy(perf_model_auc_multiancestry_score, seed = 7) %>%
summary() %>% 
slice_max(mean) %>%
pull(model)

contrast_models_auc_02 <- list()
contrast_models_auc_01 <- list()
contrast_models_auc_005 <- list()

for (score in score_list_multiancestry) {
  
    diff <-  contrast_models(
        perf_model_auc_multiancestry_score,
        list_1 = best_auc_multiancestry_score,
        list_2 = score,
        seed = 7) 
  
    model <- score
  
    contrast_models_auc_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)
  
    contrast_models_auc_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
    contrast_models_auc_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005)
}

# Probability = proportion of the posterior that is > 0 (probability that the positive difference is real)
auc_ci_multiancestry_score <- perf_model_auc_multiancestry_score %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(upper >= max(lower), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_auc_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_auc_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_auc_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability>=0.95,0,1)) %>% 
mutate(metric = "AU ROC") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_auc_multiancestry_score)

### EUR individuals

#### brier

In [None]:
# Brier
best_brier_eur <- tidy(perf_model_brier_eur, seed = 7) %>%
summary() %>% 
slice_min(mean) %>%
pull(model)

contrast_models_brier_02 <- list()
contrast_models_brier_01 <- list()
contrast_models_brier_005 <- list()

for (score in score_list) {
  
    diff <-  contrast_models(
        perf_model_brier_eur,
        list_1 = score,
        list_2 = best_brier_eur,
        seed = 7) 
  
    model <- score
  
    contrast_models_brier_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)
  
    contrast_models_brier_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
    contrast_models_brier_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005) 
}

brier_ci_eur <- perf_model_brier_eur %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(lower <= min(upper), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_brier_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability>=0.95,0,1)) %>% 
mutate(metric = "Brier Score") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_brier_eur)

#### auroc

In [None]:
# AUC
best_auc_eur <- tidy(perf_model_auc_eur, seed = 7) %>%
summary() %>% 
slice_max(mean) %>%
pull(model)

contrast_models_auc_02 <- list()
contrast_models_auc_01 <- list()
contrast_models_auc_005 <- list()

for (score in score_list){
  
    diff <-  contrast_models(
        perf_model_auc_eur,
        list_1 = best_auc_eur,
        list_2 = score,
        seed = 7) 
  
    model <- score
  
    contrast_models_auc_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)
  
    contrast_models_auc_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
    contrast_models_auc_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005)
}

# Probability = proportion of the posterior that is > 0 (probability that the positive difference is real)
auc_ci_eur <- perf_model_auc_eur %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(upper >= max(lower), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_auc_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_auc_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_auc_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability>=0.95,0,1)) %>% 
mutate(metric = "AU ROC") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_auc_eur)

### eur individuals multiancestry scores

#### brier

In [None]:
# Brier
best_brier_eur_multiancestry_score <- tidy(perf_model_brier_eur_multiancestry_score, seed = 7) %>%
summary() %>% 
slice_min(mean) %>%
pull(model)

contrast_models_brier_02 <- list()
contrast_models_brier_01 <- list()
contrast_models_brier_005 <- list()
post_diff_brier_eur <- list()


for (score in score_list_multiancestry) {
    
    diff <-  contrast_models(
        perf_model_brier_eur_multiancestry_score,
        list_1 = score,
        list_2 = best_brier_eur_multiancestry_score,
        seed = 7) 
    
    model <- score
    
    contrast_models_brier_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)

    contrast_models_brier_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
    contrast_models_brier_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005)
  
}

brier_ci_eur_multiancestry_score <- perf_model_brier_eur_multiancestry_score %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(lower <= min(upper), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_brier_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability>=0.95,0,1)) %>% 
mutate(metric = "Brier Score") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_brier_eur_multiancestry_score)

#### auc

In [None]:
# AUC
best_auc_eur_multiancestry_score <- tidy(perf_model_auc_eur_multiancestry_score, seed = 7) %>%
summary() %>% 
slice_max(mean) %>%
pull(model)

contrast_models_auc_02 <- list()
contrast_models_auc_01 <- list()
contrast_models_auc_005 <- list()

for (score in score_list_multiancestry) {

    diff <-  contrast_models(
        perf_model_auc_eur_multiancestry_score,
        list_1 = best_auc_eur_multiancestry_score,
        list_2 = score,
        seed = 7) 

    model <- score
  
    contrast_models_auc_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)
  
    contrast_models_auc_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
    contrast_models_auc_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005)
}

# Probability = proportion of the posterior that is > 0 (probability that the positive difference is real)
auc_ci_eur_multiancestry_score <- perf_model_auc_eur_multiancestry_score %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(upper >= max(lower), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_auc_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_auc_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_auc_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability>=0.95,0,1)) %>% 
mutate(metric = "AU ROC") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_auc_eur_multiancestry_score)

### afr

#### brier

In [None]:
# Brier
best_brier_afr <- tidy(perf_model_brier_afr, seed = 7) %>%
summary() %>% 
slice_min(mean) %>%
pull(model)


contrast_models_brier_02 <- list()
contrast_models_brier_01 <- list()
contrast_models_brier_005 <- list()


for (score in score_list) {
  
    diff <-  contrast_models(
        perf_model_brier_afr,
        list_1 = score,
        list_2 = best_brier_afr,
        seed = 7) 
  
    model <- score
  
    contrast_models_brier_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)
  
    contrast_models_brier_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
    contrast_models_brier_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005)
  
}

brier_ci_afr <- perf_model_brier_afr %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(lower <= min(upper), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_brier_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability>=0.95,0,1)) %>% 
mutate(metric = "Brier Score") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_brier_afr)

#### auc

In [None]:
# AUC
best_auc_afr <- tidy(perf_model_auc_afr, seed = 7) %>%
summary() %>% 
slice_max(mean) %>%
pull(model)

contrast_models_auc_02 <- list()
contrast_models_auc_01 <- list()
contrast_models_auc_005 <- list()

for (score in score_list) {
  
    diff <-  contrast_models(
        perf_model_auc_afr,
        list_1 = best_auc_afr,
        list_2 = score,
        seed = 7) 
  
    model <- score
  
    contrast_models_auc_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)
  
    contrast_models_auc_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
    contrast_models_auc_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005)
}

# Probability = proportion of the posterior that is > 0 (probability that the positive difference is real)
auc_ci_afr <- perf_model_auc_afr %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(upper >= max(lower), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_auc_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_auc_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_auc_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability>=0.95,0,1)) %>% 
mutate(metric = "AU ROC") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_auc_afr)

### afr individuals multiancestry score

#### brier

In [None]:
# Brier
best_brier_afr_multiancestry_score <- tidy(perf_model_brier_afr_multiancestry_score, seed = 7) %>%
summary() %>% 
slice_min(mean) %>%
pull(model)

contrast_models_brier_02 <- list()
contrast_models_brier_01 <- list()
contrast_models_brier_005 <- list()
post_diff_brier_afr <- list()

for (score in score_list_multiancestry) {
  
    diff <-  contrast_models(
        perf_model_brier_afr_multiancestry_score,
        list_1 = score,
        list_2 = best_brier_afr_multiancestry_score,
        seed = 7) 
  
    model <- score
  
    contrast_models_brier_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)
  
    contrast_models_brier_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
    contrast_models_brier_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005)
  
}

brier_ci_afr_multiancestry_score <- perf_model_brier_afr_multiancestry_score %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(lower <= min(upper), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_brier_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_brier_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability>=0.95,0,1)) %>% 
mutate(metric = "Brier Score") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_brier_afr_multiancestry_score)

#### auc

In [None]:
# AUC
best_auc_afr_multiancestry_score <- tidy(perf_model_auc_afr_multiancestry_score, seed = 7) %>%
summary() %>% 
slice_max(mean) %>%
pull(model)

contrast_models_auc_02 <- list()
contrast_models_auc_01 <- list()
contrast_models_auc_005 <- list()

for (score in score_list_multiancestry) {
    
    diff <-  contrast_models(
        perf_model_auc_afr_multiancestry_score,
        list_1 = best_auc_afr_multiancestry_score,
        list_2 = score,
        seed = 7) 
  
    model <- score
  
    contrast_models_auc_02[[score]] <- diff %>% 
    summary(., size = 0.02) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_02 = pract_equiv) %>% 
    select(model, pract_equiv_02, probability)
  
    contrast_models_auc_01[[score]] <- diff %>% 
    summary(., size = 0.01) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_01 = pract_equiv) %>% 
    select(model, pract_equiv_01)
  
    contrast_models_auc_005[[score]] <- diff %>% 
    summary(., size = 0.005) %>% 
    mutate(model = score) %>% 
    rename(pract_equiv_005 = pract_equiv) %>% 
    select(model, pract_equiv_005)
}

# Probability = proportion of the posterior that is > 0 (probability that the positive difference is real)
auc_ci_afr_multiancestry_score <- perf_model_auc_afr_multiancestry_score %>%
broom::tidy() %>%
summary(.,prob=0.95) %>% # summary, probability 95%
mutate(CI_95 = ifelse(upper >= max(lower), 1, 0)) %>%  
merge(.,(bind_rows(contrast_models_auc_005)), by = "model") %>% 
mutate(ROPE_005 = ifelse(pract_equiv_005 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_auc_01)), by = "model") %>%
mutate(ROPE_01 = ifelse(pract_equiv_01 < 0.95, 0, 1)) %>% 
merge(.,(bind_rows(contrast_models_auc_02)), by = "model") %>% 
mutate(ROPE_02 = ifelse(pract_equiv_02 < 0.95, 0, 1)) %>% 
mutate(prob_dif = ifelse(probability>=0.95,0,1)) %>% 
mutate(metric = "AU ROC") %>% 
mutate(.,
       ROPE_color = as.character(ROPE_005 + ROPE_01 + ROPE_02)) %>% 
arrange(desc(mean)) 

In [None]:
head(best_auc_afr_multiancestry_score)

## combine models

### all individuals

In [None]:
# AUC AND Brier
model_metrics_df <- rbind(brier_ci, auc_ci)
nrow(model_metrics_df)
head(model_metrics_df)

### all individuals multiancestry scores

In [None]:
# AUC AND Brier
model_metrics_df_multiancestry_score <- rbind(brier_ci_multiancestry_score, auc_ci_multiancestry_score)
nrow(model_metrics_df_multiancestry_score)
head(model_metrics_df_multiancestry_score)

### eur

In [None]:
# AUC AND Brier
model_metrics_df_eur <- rbind(brier_ci_eur, auc_ci_eur)
nrow(model_metrics_df_eur)
head(model_metrics_df_eur)

### eur individuals multiancestry scores

In [None]:
# AUC AND Brier
model_metrics_df_eur_multiancestry_score <- rbind(brier_ci_eur_multiancestry_score, auc_ci_eur_multiancestry_score)
nrow(model_metrics_df_eur_multiancestry_score)
head(model_metrics_df_eur_multiancestry_score)

### afr

In [None]:
# AUC AND Brier
model_metrics_df_afr <- rbind(brier_ci_afr, auc_ci_afr)
nrow(model_metrics_df_afr)
head(model_metrics_df_afr)

### afr individuals multiancestry scores

In [None]:
# AUC AND Brier
model_metrics_df_afr_multiancestry_score <- rbind(brier_ci_afr_multiancestry_score, auc_ci_afr_multiancestry_score)
nrow(model_metrics_df_afr_multiancestry_score)
head(model_metrics_df_afr_multiancestry_score)

## export

### all individuals

In [None]:
# Save output
write.table(model_metrics_df,
           'AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)


### all individuals multiancestry scores

In [None]:
# Save output
write.table(model_metrics_df_multiancestry_score,
           'AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.performance_metrics.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

### eur

In [None]:
# Save output
write.table(model_metrics_df_eur,
           'AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

### eur individuals multiancestry scores

In [None]:
# Save output
write.table(model_metrics_df_eur_multiancestry_score,
           'AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.performance_metrics.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

### afr

In [None]:
# Save output
write.table(model_metrics_df_afr,
           'AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

### afr individuals multiancestry scores

In [None]:
# Save output
write.table(model_metrics_df_afr_multiancestry_score,
           'AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.performance_metrics.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

## make and export variance plots

### All

#### ROPE 0.02

In [None]:
model_metrics_plotCI_rope_02 <- model_metrics_df %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_02 == 1 ~ '2',
                             ROPE_02 == 1 & prob_dif != 1 ~ '1',
                             ROPE_02 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'AOU ALL CKD Variance by ROPE 0.02') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.02)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")


model_metrics_plotCI_rope_02
ggsave('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.rope_0.02.equivalance_plot.png')

#### ROPE 0.01

In [None]:
model_metrics_plotCI_rope_01 <- model_metrics_df %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_01 == 1 ~ '2',
                             ROPE_01 == 1 & prob_dif != 1 ~ '1',
                             ROPE_01 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'AOU ALL CKD Variance by ROPE 0.01') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.01)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")


model_metrics_plotCI_rope_01
ggsave('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.rope_0.01.equivalance_plot.png')

#### ROPE 0.005

In [None]:
model_metrics_plotCI_rope_005 <- model_metrics_df %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_005 == 1 ~ '2',
                             ROPE_005 == 1 & prob_dif != 1 ~ '1',
                             ROPE_005 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'AOU ALL CKD Variance by ROPE 0.005') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.005)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")


model_metrics_plotCI_rope_005
ggsave('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.rope_0.005.equivalance_plot.png')

### all individuals multiancestry scores

#### rope 0.02

In [None]:
model_metrics_plotCI_rope_02_multiancestry_score <- model_metrics_df_multiancestry_score %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_02 == 1 ~ '2',
                             ROPE_02 == 1 & prob_dif != 1 ~ '1',
                             ROPE_02 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name = factor(model, levels = model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'AOU ALL CKD Variance by ROPE 0.02') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.02)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")


model_metrics_plotCI_rope_02_multiancestry_score
ggsave('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.rope_0.02.equivalance_plot.png')

#### rope 0.01

In [None]:
model_metrics_plotCI_rope_01_multiancestry_score <- model_metrics_df_multiancestry_score %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_01 == 1 ~ '2',
                             ROPE_01 == 1 & prob_dif != 1 ~ '1',
                             ROPE_01 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'AOU ALL CKD Variance by ROPE 0.01') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.01)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")


model_metrics_plotCI_rope_01_multiancestry_score
ggsave('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.rope_0.01.equivalance_plot.png')

#### rope 0.0005

In [None]:
model_metrics_plotCI_rope_005_multiancestry_score <- model_metrics_df_multiancestry_score %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_005 == 1 ~ '2',
                             ROPE_005 == 1 & prob_dif != 1 ~ '1',
                             ROPE_005 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'AOU ALL CKD Variance by ROPE 0.005') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.005)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")


model_metrics_plotCI_rope_005_multiancestry_score
ggsave('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.rope_0.005.equivalance_plot.png')

### EUR

#### ROPE 0.02

In [None]:
model_metrics_plotCI_rope_02_eur <- model_metrics_df_eur %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_02 == 1 ~ '2',
                             ROPE_02 == 1 & prob_dif != 1 ~ '1',
                             ROPE_02 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'AOU EUR CKD Variance by ROPE 0.02') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.02)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")


model_metrics_plotCI_rope_02_eur
ggsave('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.rope_0.02.equivalance_plot.png')

#### ROPE 0.01

In [None]:
model_metrics_plotCI_rope_01_eur <- model_metrics_df_eur %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_01 == 1 ~ '2',
                             ROPE_01 == 1 & prob_dif != 1 ~ '1',
                             ROPE_01 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'AOU EUR CKD Variance by ROPE 0.01') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.01)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")


model_metrics_plotCI_rope_01_eur
ggsave('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.rope_0.01.equivalance_plot.png')

#### ROPE 0.005

In [None]:
model_metrics_plotCI_rope_005_eur <- model_metrics_df_eur %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_005 == 1 ~ '2',
                             ROPE_005 == 1 & prob_dif != 1 ~ '1',
                             ROPE_005 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'AOU EUR CKD Variance by ROPE 0.005') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.005)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")


model_metrics_plotCI_rope_005_eur
ggsave('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.rope_0.005.equivalance_plot.png')

### eur individuals multiancestry scores

#### rope 0.02

In [None]:
model_metrics_plotCI_rope_02_eur_multiancestry_score <- model_metrics_df_eur_multiancestry_score %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_02 == 1 ~ '2',
                             ROPE_02 == 1 & prob_dif != 1 ~ '1',
                             ROPE_02 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'AOU EUR CKD Variance by ROPE 0.02') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.02)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")


model_metrics_plotCI_rope_02_eur_multiancestry_score
ggsave('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.rope_0.02.equivalance_plot.png')

#### rope 0.01

In [None]:
model_metrics_plotCI_rope_01_eur_multiancestry_score <- model_metrics_df_eur_multiancestry_score %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_01 == 1 ~ '2',
                             ROPE_01 == 1 & prob_dif != 1 ~ '1',
                             ROPE_01 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'AOU EUR CKD Variance by ROPE 0.01') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.01)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")


model_metrics_plotCI_rope_01_eur_multiancestry_score
ggsave('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.rope_0.01.equivalance_plot.png')

#### rope 0.005

In [None]:
model_metrics_plotCI_rope_005_eur_multiancestry_score <- model_metrics_df_eur_multiancestry_score %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_005 == 1 ~ '2',
                             ROPE_005 == 1 & prob_dif != 1 ~ '1',
                             ROPE_005 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'AOU EUR CKD Variance by ROPE 0.005') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.005)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")


model_metrics_plotCI_rope_005_eur_multiancestry_score
ggsave('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.rope_0.005.equivalance_plot.png')

### AFR

#### ROPE 0.02

In [None]:
model_metrics_plotCI_rope_02_afr <- model_metrics_df_afr %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_02 == 1 ~ '2',
                             ROPE_02 == 1 & prob_dif != 1 ~ '1',
                             ROPE_02 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'AOU AFR CKD Variance by ROPE 0.02') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.02)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")


model_metrics_plotCI_rope_02_afr
ggsave('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.rope_0.02.equivalance_plot.png')

#### ROPE 0.01

In [None]:
model_metrics_plotCI_rope_01_afr <- model_metrics_df_afr %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_01 == 1 ~ '2',
                             ROPE_01 == 1 & prob_dif != 1 ~ '1',
                             ROPE_01 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'AOU AFR CKD Variance by ROPE 0.01') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.01)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")


model_metrics_plotCI_rope_01_afr
ggsave('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.rope_0.01.equivalance_plot.png')

#### ROPE 0.005

In [None]:
model_metrics_plotCI_rope_005_afr <- model_metrics_df_afr %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_005 == 1 ~ '2',
                             ROPE_005 == 1 & prob_dif != 1 ~ '1',
                             ROPE_005 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'AOU AFR CKD Variance by ROPE 0.005') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.005)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")


model_metrics_plotCI_rope_005_afr
ggsave('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.rope_0.005.equivalance_plot.png')

### afr individuals multiancestry scores

#### rope 0.02

In [None]:
model_metrics_plotCI_rope_02_afr_multiancestry_score <- model_metrics_df_afr_multiancestry_score %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_02 == 1 ~ '2',
                             ROPE_02 == 1 & prob_dif != 1 ~ '1',
                             ROPE_02 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'AOU AFR CKD Variance by ROPE 0.02') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.02)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")


model_metrics_plotCI_rope_02_afr_multiancestry_score
ggsave('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.rope_0.02.equivalance_plot.png')

#### rope 0.01

In [None]:
model_metrics_plotCI_rope_01_afr_multiancestry_score <- model_metrics_df_afr_multiancestry_score %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_01 == 1 ~ '2',
                             ROPE_01 == 1 & prob_dif != 1 ~ '1',
                             ROPE_01 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'AOU AFR CKD Variance by ROPE 0.01') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.01)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")


model_metrics_plotCI_rope_01_afr_multiancestry_score
ggsave('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.rope_0.01.equivalance_plot.png')

#### rope 0.005

In [None]:
model_metrics_plotCI_rope_005_afr_multiancestry_score <- model_metrics_df_afr_multiancestry_score %>%
mutate(sig_color = case_when(prob_dif == 1 ~ '2',
                             prob_dif == 1 & ROPE_005 == 1 ~ '2',
                             ROPE_005 == 1 & prob_dif != 1 ~ '1',
                             ROPE_005 != 1 & prob_dif != 1 ~ '0')) %>%
group_by(metric) %>% 
mutate(name=factor(model, levels=model)) %>%
ungroup() %>% 
ggplot(., aes(x = mean, y = name, xmin = lower, xmax = upper, color = sig_color)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbarh(height = 0.2, position = position_dodge(width = 0.5)) +
labs(x = "95% Credible Interval ", y = "Score", title = 'AOU AFR CKD Variance by ROPE 0.005') +
theme_light() +
theme(legend.position = "top",
      strip.text = element_text(size = 14, colour = 'black')) +
guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "2" = "blue"), name = "", 
                   labels = c("0" = "Significantly Different", "1" = "Practically Equivalent (ROPE 0.005)", "2" = "Statistically Equivalent"))  +
facet_wrap(~ metric, scales = "free_x")


model_metrics_plotCI_rope_005_afr_multiancestry_score
ggsave('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.rope_0.005.equivalance_plot.png')

# individual score percentile

## calculate

### all individuals

In [None]:
df_ntile_norm <- all_score %>% 
filter(!grepl("PGS002757|PGS005090|AFR.eGFR.PRScsx|AMR.eGFR.PRScsx|EAS.eGFR.PRScsx|EUR.eGFR.PRScsx", PGS)) %>%
dplyr::mutate(ntile = 100 * pnorm(Z_norm2))
head(df_ntile_norm)
unique(df_ntile_norm$PGS)

### all individuals multiancestry scores

In [None]:
df_ntile_norm_multiancestry_score <- all_score %>% 
filter(!grepl("PGS002757|PGS005090|AFR|AMR|EAS|EUR.eGFR.PRScsx", PGS))%>%
dplyr::mutate(ntile = 100 * pnorm(Z_norm2))
head(df_ntile_norm_multiancestry_score)
length(unique(df_ntile_norm_multiancestry_score$PGS))

### EUR

In [None]:
df_ntile_norm_eur <- all_score_eur %>% 
filter(!grepl("PGS002757|PGS005090|AFR.eGFR.PRScsx|AMR.eGFR.PRScsx|EAS.eGFR.PRScsx|EUR.eGFR.PRScsx", PGS))%>%
dplyr::mutate(ntile = 100 * pnorm(Z_norm2))
head(df_ntile_norm_eur)
unique(df_ntile_norm_eur$PGS)

### eur individuals multiancestry scores

In [None]:
df_ntile_norm_eur_multiancestry_score <- all_score_eur %>% 
filter(!grepl("PGS002757|PGS005090|AFR|AMR|EAS|EUR.eGFR.PRScsx", PGS))%>%
dplyr::mutate(ntile = 100 * pnorm(Z_norm2))
head(df_ntile_norm_eur_multiancestry_score)
length(unique(df_ntile_norm_eur_multiancestry_score$PGS))

### afr

In [None]:
df_ntile_norm_afr <- all_score_afr %>% 
filter(!grepl("PGS002757|PGS005090|AFR.eGFR.PRScsx|AMR.eGFR.PRScsx|EAS.eGFR.PRScsx|EUR.eGFR.PRScsx", PGS))%>%
dplyr::mutate(ntile = 100 * pnorm(Z_norm2))
head(df_ntile_norm_afr)
unique(df_ntile_norm_afr$PGS)

### afr individuals multiancestry scores

In [None]:
df_ntile_norm_afr_multiancestry_score <- all_score_afr %>% 
filter(!grepl("PGS002757|PGS005090|AFR|AMR|EAS|EUR.eGFR.PRScsx", PGS))%>%
dplyr::mutate(ntile = 100 * pnorm(Z_norm2))
head(df_ntile_norm_afr_multiancestry_score)
length(unique(df_ntile_norm_afr_multiancestry_score$PGS))

### PRScs iterations

In [None]:
df_ntile_norm_afr_egfr <- afr_egfr_merge %>% 
dplyr::mutate(ntile = 100 * pnorm(Z_norm2))
head(df_ntile_norm_afr_egfr)
length(unique(df_ntile_norm_afr_egfr$PGS))

In [None]:
df_ntile_norm_eur_egfr <- eur_egfr_merge %>% 
dplyr::mutate(ntile = 100 * pnorm(Z_norm2))
head(df_ntile_norm_eur_egfr)
length(unique(df_ntile_norm_eur_egfr$PGS))

In [None]:
df_ntile_norm_meta_egfr <- meta_egfr_merge %>% 
dplyr::mutate(ntile = 100 * pnorm(Z_norm2))
head(df_ntile_norm_meta_egfr)
length(unique(df_ntile_norm_meta_egfr$PGS))

## reformat df for individual percentile plots

### all

In [None]:
df_ntile_norm_wide = df_ntile_norm %>%
pivot_wider(names_from = 'PGS',
           values_from = 'ntile',
           id_cols = 'IID',
           names_prefix = 'ntile_')
dim(df_ntile_norm_wide)
head(df_ntile_norm_wide)
write.table(df_ntile_norm_wide,
           'AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.wide_form.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

### all individuals multiancestry scores

In [None]:
df_ntile_norm_wide_multiancestry_score = df_ntile_norm_multiancestry_score %>%
pivot_wider(names_from = 'PGS',
           values_from = 'ntile',
           id_cols = 'IID',
           names_prefix = 'ntile_')
dim(df_ntile_norm_wide_multiancestry_score)
head(df_ntile_norm_wide_multiancestry_score)
write.table(df_ntile_norm_wide_multiancestry_score,
           'AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_percentile.wide_form.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

### eur

In [None]:
df_ntile_norm_wide_eur = df_ntile_norm_eur %>%
pivot_wider(names_from = 'PGS',
           values_from = 'ntile',
           id_cols = 'IID',
           names_prefix = 'ntile_')
dim(df_ntile_norm_wide_eur)
head(df_ntile_norm_wide_eur)
write.table(df_ntile_norm_wide_eur,
           'AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.wide_form.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

### eur individuals multiancestry scores

In [None]:
df_ntile_norm_wide_eur_multiancestry_score = df_ntile_norm_eur_multiancestry_score %>%
pivot_wider(names_from = 'PGS',
           values_from = 'ntile',
           id_cols = 'IID',
           names_prefix = 'ntile_')
dim(df_ntile_norm_wide_eur_multiancestry_score)
head(df_ntile_norm_wide_eur_multiancestry_score)
write.table(df_ntile_norm_wide_eur_multiancestry_score,
           'AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_percentile.wide_form.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

### afr

In [None]:
df_ntile_norm_wide_afr = df_ntile_norm_afr %>%
pivot_wider(names_from = 'PGS',
           values_from = 'ntile',
           id_cols = 'IID',
           names_prefix = 'ntile_')
dim(df_ntile_norm_wide_afr)
head(df_ntile_norm_wide_afr)
write.table(df_ntile_norm_wide_afr,
           'AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.wide_form.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

### afr individuals multiancestry scores

In [None]:
df_ntile_norm_wide_afr_multiancestry_score = df_ntile_norm_afr_multiancestry_score %>%
pivot_wider(names_from = 'PGS',
           values_from = 'ntile',
           id_cols = 'IID',
           names_prefix = 'ntile_')
dim(df_ntile_norm_wide_afr_multiancestry_score)
head(df_ntile_norm_wide_afr_multiancestry_score)
write.table(df_ntile_norm_wide_afr_multiancestry_score,
           'AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_percentile.wide_form.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

## create score lists function

In [None]:
equiv_scores <- function(df, criteria, ntile = FALSE) {
  # Ensure criteria is a column in df
  if (!criteria %in% names(df)) {
    #stop("Criteria column not found in dataframe")
    stop(paste0("Criteria column ", criteria, " not found in dataframe"))
  }
  
  # Filter models based on criteria for each group
  models <- df %>%
    group_by(model) %>%
    filter(all(.data[[criteria]] == 1)) %>%
    ungroup() %>%
    pull(model) %>%
    unique()
  
  # Sort models
  sorted_models <- models
  
  # Return the appropriate list based on ntile flag
  if (ntile) {
    ntile_list <- paste("ntile_", sorted_models, sep = "")
    return(ntile_list)
  } else {
    return(sorted_models)
  }
}

## make dot plot for 5 individuals PRS percentiles across scores

In [None]:
# Make into a function
plot_indiv_score <- function(df_ntile, all_metrics_df, criteria = "ROPE_02", n_indiv = 5, seed = 7) {
    
    ntile_list <- equiv_scores(all_metrics_df, criteria , ntile = TRUE)
    model_list <- equiv_scores(all_metrics_df, criteria , ntile = FALSE)
  
    random_ntile <- sample_n(df_ntile, n_indiv) %>% 
    select(IID, all_of(ntile_list))
  
    melt_random_ntile <- reshape2::melt(random_ntile, id = c("IID"))  %>% 
    mutate(variable = str_replace(variable, "ntile_", ""))
  
    melt_random_ntile$variable <- factor(melt_random_ntile$variable, levels = model_list)
    melt_random_ntile = melt_random_ntile %>%
    mutate(IID = factor(paste0("Participant ", as.numeric(factor(IID)))))
    melt_random_ntile$IID <- factor(melt_random_ntile$IID)
  
    score_plot_ntile_all <- ggplot(data = melt_random_ntile, aes(x = variable, y = value, color = IID, group = IID)) +
    geom_point(size = 3) +
    labs(x = "Score", y = "Percentile", title = "AOU ALL CKD PGS Percentile Variation", color = 'ID') + 
    facet_wrap(IID ~ . , nrow = n_indiv) +
    scale_color_manual(values = c('#326B8B', '#B4B5B4', '#64A4D6', '#990000', '#011F5B')) +
    #scale_color_viridis_d(option='H') +
    scale_y_continuous(expand = expansion(mult = c(0.1, 0.1))) +
    theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
          strip.background = element_blank(),
          strip.text.x = element_blank(),
         plot.margin = unit(c(1, 1, 2, 2), "cm"))
    return(score_plot_ntile_all)
}


indiv_dot_plot <- plot_indiv_score(df_ntile_norm_wide, model_metrics_df)
indiv_dot_plot
ggsave('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.dot_plot.png',
       width = 12,
       height = 7,
       dpi = 300)

In [None]:
# Make into a function
plot_indiv_score <- function(df_ntile, all_metrics_df, criteria = "ROPE_02", n_indiv = 5, seed = 7) {
    
    ntile_list <- equiv_scores(all_metrics_df, criteria , ntile = TRUE)
    model_list <- equiv_scores(all_metrics_df, criteria , ntile = FALSE)
  
    random_ntile <- sample_n(df_ntile, n_indiv) %>% 
    select(IID, all_of(ntile_list))
  
    melt_random_ntile <- reshape2::melt(random_ntile, id = c("IID"))  %>% 
    mutate(variable = str_replace(variable, "ntile_", ""))
  
    melt_random_ntile$variable <- factor(melt_random_ntile$variable, levels = model_list)
    melt_random_ntile$IID <- factor(melt_random_ntile$IID)
  
    score_plot_ntile_all <- ggplot(data = melt_random_ntile, aes(x = variable, y = value, color = IID, group = IID)) +
    geom_point(size = 3) +
    labs(x = "Score", y = "Percentile", title = "AOU EUR CKD PGS Percentile Variation") + 
    facet_wrap(IID ~ . , nrow = n_indiv) +
    scale_color_viridis_d(option='H') +
    theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
          strip.background = element_blank(),
          strip.text.x = element_blank(),
         plot.margin = unit(c(1, 1, 2, 2), "cm"))
    return(score_plot_ntile_all)
}


indiv_dot_plot <- plot_indiv_score(df_ntile_norm_wide_eur, model_metrics_df_eur)
indiv_dot_plot
ggsave('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.dot_plot.png',
       width = 9,
       height = 6,
       dpi = 300)

In [None]:
# Make into a function
plot_indiv_score <- function(df_ntile, all_metrics_df, criteria = "ROPE_02", n_indiv = 5, seed = 7) {
    
    ntile_list <- equiv_scores(all_metrics_df, criteria , ntile = TRUE)
    model_list <- equiv_scores(all_metrics_df, criteria , ntile = FALSE)
  
    random_ntile <- sample_n(df_ntile, n_indiv) %>% 
    select(IID, all_of(ntile_list))
  
    melt_random_ntile <- reshape2::melt(random_ntile, id = c("IID"))  %>% 
    mutate(variable = str_replace(variable, "ntile_", ""))
  
    melt_random_ntile$variable <- factor(melt_random_ntile$variable, levels = model_list)
    melt_random_ntile$IID <- factor(melt_random_ntile$IID)
  
    score_plot_ntile_all <- ggplot(data = melt_random_ntile, aes(x = variable, y = value, color = IID, group = IID)) +
    geom_point(size = 3) +
    labs(x = "Score", y = "Percentile", title = "AOU AFR CKD PGS Percentile Variation") + 
    facet_wrap(IID ~ . , nrow = n_indiv) +
    scale_color_viridis_d(option='H') +
    theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
          strip.background = element_blank(),
          strip.text.x = element_blank(),
         plot.margin = unit(c(1, 1, 2, 2), "cm"))
    return(score_plot_ntile_all)
}


indiv_dot_plot <- plot_indiv_score(df_ntile_norm_wide_afr, model_metrics_df_afr)
indiv_dot_plot
ggsave('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.dot_plot.png',
       width = 9,
       height = 6,
       dpi = 300)

## reformat df for percentile stats function

In [None]:
all_score_pheno_covar = all_score %>%
select(IID, SEX, AGE, CKD, MostSimilarPop) %>%
distinct()

df_score_wide = df_ntile_norm%>%
pivot_wider(names_from = 'PGS',
           values_from = 'Z_norm2',
           id_cols = 'IID')

df_ntile_norm_score_wide_pheno_covar = df_ntile_norm_wide %>%
inner_join(all_score_pheno_covar, by = 'IID') %>%
inner_join(df_score_wide, by = 'IID')
dim(df_ntile_norm_score_wide_pheno_covar)
head(df_ntile_norm_score_wide_pheno_covar)

## calculate percentile stats across all individuals

In [None]:
make_sumscores_pivot <- function(df_ntile_norm, all_metrics_df = model_metrics_df,  criteria = "ROPE_02") {
    ntile_list <- equiv_scores(all_metrics_df, criteria , ntile = TRUE)
    model_list <- equiv_scores(all_metrics_df, criteria , ntile = FALSE)
    pheno <- "CKD_status"
    
    sumscores_pivot <- df_ntile_norm %>% 
    select(IID, AGE, SEX, MostSimilarPop, CKD, all_of(ntile_list), all_of(model_list)) %>% 
    mutate(!!pheno := base::as.factor(CKD)) %>%
    pivot_longer(cols = matches("^(PGS|EAS|AFR|AMR|EUR)"), names_to = "PGS_method", values_to = "PGS") %>% 
    pivot_longer(cols = starts_with("ntile"), names_to = "ntile_method", values_to = "ntile")
  
    return(sumscores_pivot)
}

sumscores_pivot <- make_sumscores_pivot(df_ntile_norm_score_wide_pheno_covar)

sum_ntile <- sumscores_pivot %>% 
  desc_statby(., measure.var = "ntile", grps = c("IID"))

In [None]:
head(sumscores_pivot)
nrow(sumscores_pivot)

In [None]:
head(sum_ntile)
nrow(sum_ntile)

In [None]:
write.table(sumscores_pivot,
           gzfile('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.pivot.txt.gz'),
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

In [None]:
write.table(sum_ntile,
           'AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.sumstats.individual_level.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

## make "all individual percentile plots" aggreated by mean, sd, and cv

In [None]:
sum_metrics <- c("mean", "sd", "cv")
metric_labels <- c("mean" = "Mean", "sd" = "Standard Deviation", "cv" = "Coefficient of Variation")
titles = c("mean" = "Distribution of Mean PGS Percentiles per Individual",
          "sd" = "Distribution of PGS Percentile Standard Deviation per Individual",
          "cv" = "Distribution of PGS Percentile Coefficient of Variance per Individual")

# Revised version with percentile labels
plot_list <- list()

# density plot
for (metric in sum_metrics) {
    metric_label <- metric_labels[metric]
    title <- titles[metric]
    plot <- ggplot(sum_ntile, aes(x = .data[[metric]])) + geom_density(alpha = .7, color = "#B24745FF",  fill = "#B24745FF") + 
    labs(title = title,
         x = paste("PGS Percentile", metric_label),
         y = "Density"
        ) + theme(legend.position = "none")
    # Add the plot to the list
    plot_list[[metric]] <- plot
}
# Create a named list of plots
named_plot_list <- setNames(plot_list, sum_metrics)
mean <-(named_plot_list[[1]])
sd <-(named_plot_list[[2]])
cv <-(named_plot_list[[3]])

In [None]:
mean
ggsave('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_mean_percentile.density_plot.png')

In [None]:
sd
ggsave('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_stdev_percentile.density_plot.png')

In [None]:
cv
ggsave('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_coefficient_variation_percentile.density_plot.png')

## compute avg stats acorss all people and scores

In [None]:
sum_metrics <- c("mean", "sd", "cv")

avg_stats <- lapply(sum_metrics, function(met) {
  boot_med <- simpleboot::one.boot(sum_ntile[[met]], median, R=1000)
  ci_result <- boot::boot.ci(boot_med, conf=0.95, type="norm")

  # Adjust this part based on the structure of 'ci_result'
  # For example, if 'ci_result' has elements named 'normal', 'basic', etc.
  # Extract the desired confidence interval bounds from the correct sub-element
  lower_bound <- ci_result$normal[[2]]  
  upper_bound <- ci_result$normal[[3]]
  median <- ci_result$t0

  df_result <- data.frame(Metric = met, 
                          Lower = lower_bound, 
                          Upper = upper_bound,
                          median = median)
  return(df_result)
}) %>% 
  bind_rows(.)


write.table(avg_stats,
            'AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.sumstats.all_individuals_scores.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)
            
head(avg_stats)

## make plots showing range of individual-level discordance in percentile classification

### create score list function (if needed)

In [None]:
equiv_scores <- function(df, criteria, ntile = FALSE) {
  # Ensure criteria is a column in df
  if (!criteria %in% names(df)) {
    #stop("Criteria column not found in dataframe")
    stop(paste0("Criteria column ", criteria, " not found in dataframe"))
  }
  
  # Filter models based on criteria for each group
  models <- df %>%
    group_by(model) %>%
    filter(all(.data[[criteria]] == 1)) %>%
    ungroup() %>%
    pull(model) %>%
    unique()
  
  # Sort models
  sorted_models <- models
  
  # Return the appropriate list based on ntile flag
  if (ntile) {
    ntile_list <- paste("ntile_", sorted_models, sep = "")
    return(ntile_list)
  } else {
    return(sorted_models)
  }
}

### make inputs

In [None]:
metrics <- c("ROPE_02")
ntile_list <- equiv_scores(model_metrics_df, metrics , ntile = TRUE)
model_list <- equiv_scores(model_metrics_df, metrics , ntile = FALSE)

ntile_df_plot <- df_ntile_norm_wide %>% 
    select(IID, all_of(ntile_list))  %>% # ntile list is list of score names with ntile prefix to include
    pivot_longer(cols = -IID) %>% 
    mutate(name = str_replace(name, "ntile_", "")) %>% 
    filter(name %in% model_list) %>% 
    #select(IID, value) %>% # just get to IID and values
    group_by(IID) %>%
    mutate(max_score = max(value), min_score = min(value))%>%
    ungroup() %>% # This gets you a data frame with IID, Score name, score value, and that IID's max and min score
    select(IID, max_score, min_score)%>%
    mutate(score_range = max_score - min_score) %>%
    distinct()

ntile_plot_groups <- ntile_df_plot %>%
mutate(risk = case_when(
    max_score > 98 & min_score < 2 ~ "Above 98% and Below 2%",
    max_score > 95 & min_score < 5 ~ "Above 95% and Below 5%",
    max_score > 80 & min_score < 20 ~ "Above 80% and Below 20%",
    .default = "Intermediate agreement")) %>%
mutate(risk = forcats::fct_relevel(risk, 
                                   c("Above 98% and Below 2%",
                                     "Above 95% and Below 5%", 
                                     "Above 80% and Below 20%", 
                                     "Intermediate agreement")))
      
ntile_df_plot %>%
filter(score_range >= 98) %>%
nrow()
head(ntile_plot_groups)

In [None]:
metrics <- c("ROPE_02")
ntile_list <- equiv_scores(model_metrics_df_eur, metrics , ntile = TRUE)
model_list <- equiv_scores(model_metrics_df_eur, metrics , ntile = FALSE)

ntile_df_plot_eur <- df_ntile_norm_wide_eur %>% 
    select(IID, all_of(ntile_list))  %>% # ntile list is list of score names with ntile prefix to include
    pivot_longer(cols = -IID) %>% 
    mutate(name = str_replace(name, "ntile_", "")) %>% 
    filter(name %in% model_list) %>% 
    #select(IID, value) %>% # just get to IID and values
    group_by(IID) %>%
    mutate(max_score = max(value), min_score = min(value))%>%
    ungroup() %>% # This gets you a data frame with IID, Score name, score value, and that IID's max and min score
    select(IID, max_score, min_score)%>%
    mutate(score_range = max_score - min_score) %>%
    distinct()

ntile_plot_groups_eur <- ntile_df_plot_eur %>%
mutate(risk = case_when(
    max_score > 98 & min_score < 2 ~ "Above 98% and Below 2%",
    max_score > 95 & min_score < 5 ~ "Above 95% and Below 5%",
    max_score > 80 & min_score < 20 ~ "Above 80% and Below 20%",
    .default = "Intermediate agreement")) %>%
mutate(risk = forcats::fct_relevel(risk, 
                                   c("Above 98% and Below 2%",
                                     "Above 95% and Below 5%", 
                                     "Above 80% and Below 20%", 
                                     "Intermediate agreement")))
      
ntile_df_plot_eur %>%
filter(score_range >= 98) %>%
nrow()
head(ntile_plot_groups_eur)

In [None]:
metrics <- c("ROPE_02")
ntile_list <- equiv_scores(model_metrics_df_afr, metrics , ntile = TRUE)
model_list <- equiv_scores(model_metrics_df_eur, metrics , ntile = FALSE)

ntile_df_plot_afr <- df_ntile_norm_wide_afr %>% 
    select(IID, all_of(ntile_list))  %>% # ntile list is list of score names with ntile prefix to include
    pivot_longer(cols = -IID) %>% 
    mutate(name = str_replace(name, "ntile_", "")) %>% 
    filter(name %in% model_list) %>% 
    #select(IID, value) %>% # just get to IID and values
    group_by(IID) %>%
    mutate(max_score = max(value), min_score = min(value))%>%
    ungroup() %>% # This gets you a data frame with IID, Score name, score value, and that IID's max and min score
    select(IID, max_score, min_score)%>%
    mutate(score_range = max_score - min_score) %>%
    distinct()

ntile_plot_groups_afr <- ntile_df_plot_afr %>%
mutate(risk = case_when(
    max_score > 98 & min_score < 2 ~ "Above 98% and Below 2%",
    max_score > 95 & min_score < 5 ~ "Above 95% and Below 5%",
    max_score > 80 & min_score < 20 ~ "Above 80% and Below 20%",
    .default = "Intermediate agreement")) %>%
mutate(risk = forcats::fct_relevel(risk, 
                                   c("Above 98% and Below 2%",
                                     "Above 95% and Below 5%", 
                                     "Above 80% and Below 20%", 
                                     "Intermediate agreement")))
      
ntile_df_plot_afr %>%
filter(score_range >= 98) %>%
nrow()
head(ntile_plot_groups_afr)

### make plot

In [None]:
ntile_plot_groups %>%
ggplot(aes(x = max_score, y = min_score)) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
geom_point(aes(fill = risk), shape = 21, size = 2) +
geom_hline(yintercept = c(5, 20), linetype = "dotted") +
geom_vline(xintercept = c(80, 95), linetype = "dotted") +
scale_x_continuous(labels = scales::percent_format(scale = 1), limits = c(0, 100)) +
scale_y_continuous(labels = scales::percent_format(scale = 1), limits = c(0, 100)) +
labs(x = "Maximum Score Percentile",
     y = "Minimum Score Percentile",
     title = "AOU ALL CKD PGS Percentile Consistency Across Models",
     fill = "Risk Groups") +
guides(fill = guide_legend(override.aes = list(alpha = 1, size = 3))) +
coord_fixed() +
scale_fill_manual(values = c("Above 98% and Below 2%" = "orange",
                              "Above 95% and Below 5%" = "darkorchid1",
                              "Above 80% and Below 20%" = "skyblue",
                             "Intermediate agreement" = "pink"))
ggsave('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile_variation_range.png')

In [None]:
ntile_plot_groups_eur %>%
ggplot(aes(x = max_score, y = min_score)) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
geom_point(aes(fill = risk), shape = 21, size = 2) +
geom_hline(yintercept = c(5, 20), linetype = "dotted") +
geom_vline(xintercept = c(80, 95), linetype = "dotted") +
scale_x_continuous(labels = scales::percent_format(scale = 1), limits = c(0, 100)) +
scale_y_continuous(labels = scales::percent_format(scale = 1), limits = c(0, 100)) +
labs(x = "Maximum Score Percentile",
     y = "Minimum Score Percentile",
     title = "AOU EUR CKD PGS Percentile Consistency Across Models",
     fill = "Risk Groups") +
guides(fill = guide_legend(override.aes = list(alpha = 1, size = 3))) +
coord_fixed() +
scale_fill_manual(values = c("Above 98% and Below 2%" = "orange",
                              "Above 95% and Below 5%" = "darkorchid1",
                              "Above 80% and Below 20%" = "skyblue",
                             "Intermediate agreement" = "pink"))
ggsave('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile_variation_range.png')

In [None]:
ntile_plot_groups_afr %>%
ggplot(aes(x = max_score, y = min_score)) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
geom_point(aes(fill = risk), shape = 21, size = 2) +
geom_hline(yintercept = c(5, 20), linetype = "dotted") +
geom_vline(xintercept = c(80, 95), linetype = "dotted") +
scale_x_continuous(labels = scales::percent_format(scale = 1), limits = c(0, 100)) +
scale_y_continuous(labels = scales::percent_format(scale = 1), limits = c(0, 100)) +
labs(x = "Maximum Score Percentile",
     y = "Minimum Score Percentile",
     title = "AOU AFR CKD PGS Percentile Consistency Across Models",
     fill = "Risk Groups") +
guides(fill = guide_legend(override.aes = list(alpha = 1, size = 3))) +
coord_fixed() +
scale_fill_manual(values = c("Above 98% and Below 2%" = "orange",
                              "Above 95% and Below 5%" = "darkorchid1",
                              "Above 80% and Below 20%" = "skyblue",
                             "Intermediate agreement" = "pink"))
ggsave('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile_variation_range.png')

### compute summary stats for these differences

In [None]:
risk_stats <- ntile_df_plot %>%
        summarise(
          total = n(),
          extreme = sum(max_score > 95 & min_score < 5),
          wide = sum(max_score > 80 & min_score < 20),
          intermediate = sum(!(max_score > 80 & min_score < 20))
        ) %>%
        mutate(
          extreme_pct = extreme / total * 100,
          wide_pct = wide / total * 100,
          intermediate_pct = intermediate / total * 100
        )

risk_labels <- c(
        "Above 95% and Below 5%" = sprintf("Above 95%% and Below 5%% (%.1f%% of participants)", risk_stats$extreme_pct),
        "Above 80% and Below 20%" = sprintf("Above 80%% and Below 20%% (%.1f%% of participants)", risk_stats$wide_pct),
        "Intermediate agreement" = sprintf("Intermediate agreement (%.1f%% of participants)", risk_stats$intermediate_pct)
      )
head(risk_stats)
write.table(risk_stats,
           'AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile_variation_range.sumstats.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

In [None]:
risk_stats_eur <- ntile_df_plot_eur %>%
        summarise(
          total = n(),
          extreme = sum(max_score > 95 & min_score < 5),
          wide = sum(max_score > 80 & min_score < 20),
          intermediate = sum(!(max_score > 80 & min_score < 20))
        ) %>%
        mutate(
          extreme_pct = extreme / total * 100,
          wide_pct = wide / total * 100,
          intermediate_pct = intermediate / total * 100
        )

risk_labels <- c(
        "Above 95% and Below 5%" = sprintf("Above 95%% and Below 5%% (%.1f%% of participants)", risk_stats$extreme_pct),
        "Above 80% and Below 20%" = sprintf("Above 80%% and Below 20%% (%.1f%% of participants)", risk_stats$wide_pct),
        "Intermediate agreement" = sprintf("Intermediate agreement (%.1f%% of participants)", risk_stats$intermediate_pct)
      )
head(risk_stats_eur)
write.table(risk_stats_eur,
           'AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile_variation_range.sumstats.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

In [None]:
risk_stats_afr <- ntile_df_plot_afr %>%
        summarise(
          total = n(),
          extreme = sum(max_score > 95 & min_score < 5),
          wide = sum(max_score > 80 & min_score < 20),
          intermediate = sum(!(max_score > 80 & min_score < 20))
        ) %>%
        mutate(
          extreme_pct = extreme / total * 100,
          wide_pct = wide / total * 100,
          intermediate_pct = intermediate / total * 100
        )

risk_labels <- c(
        "Above 95% and Below 5%" = sprintf("Above 95%% and Below 5%% (%.1f%% of participants)", risk_stats$extreme_pct),
        "Above 80% and Below 20%" = sprintf("Above 80%% and Below 20%% (%.1f%% of participants)", risk_stats$wide_pct),
        "Intermediate agreement" = sprintf("Intermediate agreement (%.1f%% of participants)", risk_stats$intermediate_pct)
      )
head(risk_stats_afr)
write.table(risk_stats_afr,
           'AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile_variation_range.sumstats.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)

# identify individuals with 95% confidence

Steps
1. took the average of those 100 scores per person to get the "posterior mean PGS"
2. found the PGS value corresponding to the eMERGE percentile risk cutoff in the "posterior mean PGS" across everyone - for T2D this was top 2% (also top 2% for CKD)
3. then per person, i counted the number of those 100 scores that are above that PGS value from (2). If 95 or more of those scores are over the value, then they are called as having >95% confidence for high risk

## compute mean PGS

In [None]:
df_ntile_norm_afr_egfr_mean = df_ntile_norm_afr_egfr %>%
group_by(IID) %>%
summarise(MEAN_PGS = mean(ntile))
summary(df_ntile_norm_afr_egfr_mean$MEAN_PGS)

In [None]:
df_ntile_norm_eur_egfr_mean = df_ntile_norm_eur_egfr %>%
group_by(IID) %>%
summarise(MEAN_PGS = mean(ntile))
summary(df_ntile_norm_eur_egfr_mean$MEAN_PGS)

In [None]:
df_ntile_norm_meta_egfr_mean = df_ntile_norm_meta_egfr %>%
group_by(IID) %>%
summarise(MEAN_PGS = mean(ntile))
summary(df_ntile_norm_meta_egfr_mean$MEAN_PGS)

In [None]:
df_ntile_norm_meta_phe_mean = df_ntile_norm_meta_phe %>%
group_by(IID) %>%
summarise(MEAN_PGS = mean(ntile))
summary(df_ntile_norm_meta_phe_mean$MEAN_PGS)

## get N scores in top 2% per person

In [None]:
df_ntile_norm_afr_egfr_count = df_ntile_norm_afr_egfr %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n())
summary(df_ntile_norm_afr_egfr_count$COUNT_PGS)

In [None]:
df_ntile_norm_eur_egfr_count = df_ntile_norm_eur_egfr %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n())
summary(df_ntile_norm_eur_egfr_count$COUNT_PGS)

In [None]:
df_ntile_norm_meta_egfr_count = df_ntile_norm_meta_egfr %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n())
summary(df_ntile_norm_meta_egfr_count$COUNT_PGS)

## make eur and afr dfs

In [None]:
afr = pop_sub %>%
filter(MostSimilarPop == 'AFR')
nrow(afr)

In [None]:
eur = pop_sub %>%
filter(MostSimilarPop == 'EUR')
nrow(eur)

## see how many people meet confidence thresholding

### afr egfr - all

In [None]:
df_ntile_norm_afr_egfr_95 = df_ntile_norm_afr_egfr_count %>%
filter(COUNT_PGS >= 95)
nrow(df_ntile_norm_afr_egfr_95)
nrow(df_ntile_norm_afr_egfr_95[df_ntile_norm_afr_egfr_95$IID %in% eur$IID,])
nrow(df_ntile_norm_afr_egfr_95[df_ntile_norm_afr_egfr_95$IID %in% afr$IID,])

In [None]:
df_ntile_norm_afr_egfr_90 = df_ntile_norm_afr_egfr_count %>%
filter(COUNT_PGS >= 90)
nrow(df_ntile_norm_afr_egfr_90)
nrow(df_ntile_norm_afr_egfr_90[df_ntile_norm_afr_egfr_90$IID %in% eur$IID,])
nrow(df_ntile_norm_afr_egfr_90[df_ntile_norm_afr_egfr_90$IID %in% afr$IID,])

In [None]:
df_ntile_norm_afr_egfr_80 = df_ntile_norm_afr_egfr_count %>%
filter(COUNT_PGS >= 80)
nrow(df_ntile_norm_afr_egfr_80)
nrow(df_ntile_norm_afr_egfr_80[df_ntile_norm_afr_egfr_80$IID %in% eur$IID,])
nrow(df_ntile_norm_afr_egfr_80[df_ntile_norm_afr_egfr_80$IID %in% afr$IID,])

In [None]:
df_ntile_norm_afr_egfr_70 = df_ntile_norm_afr_egfr_count %>%
filter(COUNT_PGS >= 70)
nrow(df_ntile_norm_afr_egfr_70)
nrow(df_ntile_norm_afr_egfr_70[df_ntile_norm_afr_egfr_70$IID %in% eur$IID,])
nrow(df_ntile_norm_afr_egfr_70[df_ntile_norm_afr_egfr_70$IID %in% afr$IID,])

In [None]:
df_ntile_norm_afr_egfr_50 = df_ntile_norm_afr_egfr_count %>%
filter(COUNT_PGS >= 50)
nrow(df_ntile_norm_afr_egfr_50)
nrow(df_ntile_norm_afr_egfr_50[df_ntile_norm_afr_egfr_50$IID %in% eur$IID,])
nrow(df_ntile_norm_afr_egfr_50[df_ntile_norm_afr_egfr_50$IID %in% afr$IID,])

### eur egfr

In [None]:
df_ntile_norm_eur_egfr_95 = df_ntile_norm_eur_egfr_count %>%
filter(COUNT_PGS >= 95)
nrow(df_ntile_norm_eur_egfr_95)
nrow(df_ntile_norm_eur_egfr_95[df_ntile_norm_eur_egfr_95$IID %in% eur$IID,])
nrow(df_ntile_norm_eur_egfr_95[df_ntile_norm_eur_egfr_95$IID %in% afr$IID,])

In [None]:
df_ntile_norm_eur_egfr_90 = df_ntile_norm_eur_egfr_count %>%
filter(COUNT_PGS >= 90)
nrow(df_ntile_norm_eur_egfr_90)
nrow(df_ntile_norm_eur_egfr_90[df_ntile_norm_eur_egfr_90$IID %in% eur$IID,])
nrow(df_ntile_norm_eur_egfr_90[df_ntile_norm_eur_egfr_90$IID %in% afr$IID,])

In [None]:
df_ntile_norm_eur_egfr_80 = df_ntile_norm_eur_egfr_count %>%
filter(COUNT_PGS >= 80)
nrow(df_ntile_norm_eur_egfr_80)
nrow(df_ntile_norm_eur_egfr_80[df_ntile_norm_eur_egfr_80$IID %in% eur$IID,])
nrow(df_ntile_norm_eur_egfr_80[df_ntile_norm_eur_egfr_80$IID %in% afr$IID,])

In [None]:
df_ntile_norm_eur_egfr_70 = df_ntile_norm_eur_egfr_count %>%
filter(COUNT_PGS >= 70)
nrow(df_ntile_norm_eur_egfr_70)
nrow(df_ntile_norm_eur_egfr_70[df_ntile_norm_eur_egfr_70$IID %in% eur$IID,])
nrow(df_ntile_norm_eur_egfr_70[df_ntile_norm_eur_egfr_70$IID %in% afr$IID,])

In [None]:
df_ntile_norm_eur_egfr_50 = df_ntile_norm_eur_egfr_count %>%
filter(COUNT_PGS >= 50)
nrow(df_ntile_norm_eur_egfr_50)
nrow(df_ntile_norm_eur_egfr_50[df_ntile_norm_eur_egfr_50$IID %in% eur$IID,])
nrow(df_ntile_norm_eur_egfr_50[df_ntile_norm_eur_egfr_50$IID %in% afr$IID,])

### meta egfr

In [None]:
df_ntile_norm_meta_egfr_95 = df_ntile_norm_meta_egfr_count%>%
filter(COUNT_PGS >= (0.95 * 400))
nrow(df_ntile_norm_meta_egfr_95)
nrow(df_ntile_norm_meta_egfr_95[df_ntile_norm_meta_egfr_95$IID %in% eur$IID,])
nrow(df_ntile_norm_meta_egfr_95[df_ntile_norm_meta_egfr_95$IID %in% afr$IID,])

In [None]:
df_ntile_norm_meta_egfr_90 = df_ntile_norm_meta_egfr_count%>%
filter(COUNT_PGS >= (0.90 * 400))
nrow(df_ntile_norm_meta_egfr_90)
nrow(df_ntile_norm_meta_egfr_90[df_ntile_norm_meta_egfr_90$IID %in% eur$IID,])
nrow(df_ntile_norm_meta_egfr_90[df_ntile_norm_meta_egfr_90$IID %in% afr$IID,])

In [None]:
df_ntile_norm_meta_egfr_80 = df_ntile_norm_meta_egfr_count%>%
filter(COUNT_PGS >= (0.80 * 400))
nrow(df_ntile_norm_meta_egfr_80)
nrow(df_ntile_norm_meta_egfr_80[df_ntile_norm_meta_egfr_80$IID %in% eur$IID,])
nrow(df_ntile_norm_meta_egfr_80[df_ntile_norm_meta_egfr_80$IID %in% afr$IID,])

In [None]:
df_ntile_norm_meta_egfr_70 = df_ntile_norm_meta_egfr_count%>%
filter(COUNT_PGS >= (0.70 * 400))
nrow(df_ntile_norm_meta_egfr_70)
nrow(df_ntile_norm_meta_egfr_70[df_ntile_norm_meta_egfr_70$IID %in% eur$IID,])
nrow(df_ntile_norm_meta_egfr_70[df_ntile_norm_meta_egfr_70$IID %in% afr$IID,])

In [None]:
df_ntile_norm_meta_egfr_50 = df_ntile_norm_meta_egfr_count%>%
filter(COUNT_PGS >= (0.50 * 400))
nrow(df_ntile_norm_meta_egfr_50)
nrow(df_ntile_norm_meta_egfr_50[df_ntile_norm_meta_egfr_50$IID %in% eur$IID,])
nrow(df_ntile_norm_meta_egfr_50[df_ntile_norm_meta_egfr_50$IID %in% afr$IID,])

# calculate stability

## read in input files

### all

In [None]:
model_metrics_df = read.csv('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                            sep = '\t')

### all individuals multiancestry scores

In [None]:
model_metrics_df_multiancestry_score = read.csv('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.performance_metrics.txt',
                                                sep = '\t')

### eur

In [None]:
model_metrics_df_eur = read.csv('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                                sep = '\t')

### eur individuals multiancestry scores

In [None]:
model_metrics_df_eur_multiancestry_score = read.csv('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.performance_metrics.txt',
                                                    sep = '\t')

### afr

In [None]:
model_metrics_df_afr = read.csv('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                                sep = '\t')

### afr individuals multiancestry scores

In [None]:
model_metrics_df_afr_multiancestry_score = read.csv('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.performance_metrics.txt',
                                                    sep = '\t')

## create function (if needed)

In [None]:
equiv_scores <- function(df, criteria, ntile = FALSE) {
    # Ensure criteria is a column in df
    if (!criteria %in% names(df)) {
        #stop("Criteria column not found in dataframe")
        stop(paste0("Criteria column ", criteria, " not found in dataframe"))
    }
  
    # Filter models based on criteria for each group
    models <- df %>%
    group_by(model) %>%
    filter(all(.data[[criteria]] == 1)) %>%
    ungroup() %>%
    pull(model) %>%
    unique()
  
    # Sort models
    sorted_models <- models
  
    # Return the appropriate list based on ntile flag
    if (ntile) {
        ntile_list <- paste("ntile_", sorted_models, sep = "")
        return(ntile_list)
    } else {
        return(sorted_models)
    }
}

## calculate number of 98th+ percentile classifications per each score and individual

### all

In [None]:
ntile_list <- equiv_scores(model_metrics_df, "ROPE_02", ntile = FALSE)
n_scores = length(ntile_list)
print(n_scores)

n_class = df_ntile_norm %>%
filter(PGS %in% ntile_list) %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n_distinct(PGS)) %>%
mutate(STABILITY = (COUNT_PGS/n_scores) * 100)

head(n_class)
summary(n_class$COUNT_PGS)
summary(n_class$STABILITY)
length(unique(n_class$IID))

### all individuals multiancestry scores

In [None]:
ntile_list <- equiv_scores(model_metrics_df_multiancestry_score, "ROPE_02", ntile = FALSE)
n_scores = length(ntile_list)
print(n_scores)

n_class_multiancestry_score = df_ntile_norm_multiancestry_score %>%
filter(PGS %in% ntile_list) %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n_distinct(PGS)) %>%
mutate(STABILITY = (COUNT_PGS/n_scores) * 100)

head(n_class_multiancestry_score)
summary(n_class_multiancestry_score$COUNT_PGS)
summary(n_class_multiancestry_score$STABILITY)
length(unique(n_class_multiancestry_score$IID))

### eur

In [None]:
ntile_list <- equiv_scores(model_metrics_df, "ROPE_02", ntile = FALSE)
n_scores = length(ntile_list)
print(n_scores)

n_class_eur = df_ntile_norm_eur %>%
filter(PGS %in% ntile_list) %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n_distinct(PGS)) %>%
mutate(STABILITY = (COUNT_PGS/n_scores) * 100)

head(n_class_eur)
summary(n_class_eur$COUNT_PGS)
summary(n_class_eur$STABILITY)
length(unique(n_class_eur$IID))

### eur individuals multiancestry scores

In [None]:
ntile_list <- equiv_scores(model_metrics_df_multiancestry_score, "ROPE_02", ntile = FALSE)
n_scores = length(ntile_list)
print(n_scores)

n_class_eur_multiancestry_score = df_ntile_norm_eur_multiancestry_score %>%
filter(PGS %in% ntile_list) %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n_distinct(PGS)) %>%
mutate(STABILITY = (COUNT_PGS/n_scores) * 100)

head(n_class_eur_multiancestry_score)
summary(n_class_eur_multiancestry_score$COUNT_PGS)
summary(n_class_eur_multiancestry_score$STABILITY)
length(unique(n_class_eur_multiancestry_score$IID))

### afr

In [None]:
ntile_list <- equiv_scores(model_metrics_df, "ROPE_02", ntile = FALSE)
n_scores = length(ntile_list)
print(n_scores)

n_class_afr = df_ntile_norm_afr %>%
filter(PGS %in% ntile_list) %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n_distinct(PGS)) %>%
mutate(STABILITY = (COUNT_PGS/n_scores) * 100)

head(n_class_afr)
summary(n_class_afr$COUNT_PGS)
summary(n_class_afr$STABILITY)
length(unique(n_class_afr$IID))

### afr individuals multiancestry scores

In [None]:
ntile_list <- equiv_scores(model_metrics_df_multiancestry_score, "ROPE_02", ntile = FALSE)
n_scores = length(ntile_list)
print(n_scores)

n_class_afr_multiancestry_score = df_ntile_norm_afr_multiancestry_score %>%
filter(PGS %in% ntile_list) %>%
filter(ntile >= 98) %>%
group_by(IID) %>%
summarise(COUNT_PGS = n_distinct(PGS)) %>%
mutate(STABILITY = (COUNT_PGS/n_scores) * 100)

head(n_class_afr_multiancestry_score)
summary(n_class_afr_multiancestry_score$COUNT_PGS)
summary(n_class_afr_multiancestry_score$STABILITY)
length(unique(n_class_afr_multiancestry_score$IID))

## filter to high confidence individuals

### all

#### eur egfr

In [None]:
n_class_eur_egfr_95 = n_class[n_class$IID %in% df_ntile_norm_eur_egfr_95$IID,]

summary(n_class_eur_egfr_95$COUNT_PGS)
summary(n_class_eur_egfr_95$STABILITY)
length(unique(n_class_eur_egfr_95$IID))

In [None]:
n_class_eur_egfr_90 = n_class[n_class$IID %in% df_ntile_norm_eur_egfr_90$IID,]

summary(n_class_eur_egfr_90$COUNT_PGS)
summary(n_class_eur_egfr_90$STABILITY)
length(unique(n_class_eur_egfr_90$IID))

In [None]:
n_class_eur_egfr_80 = n_class[n_class$IID %in% df_ntile_norm_eur_egfr_80$IID,]

summary(n_class_eur_egfr_80$COUNT_PGS)
summary(n_class_eur_egfr_80$STABILITY)
length(unique(n_class_eur_egfr_80$IID))

In [None]:
n_class_eur_egfr_70 = n_class[n_class$IID %in% df_ntile_norm_eur_egfr_70$IID,]

summary(n_class_eur_egfr_70$COUNT_PGS)
summary(n_class_eur_egfr_70$STABILITY)
length(unique(n_class_eur_egfr_70$IID))

In [None]:
n_class_eur_egfr_50 = n_class[n_class$IID %in% df_ntile_norm_eur_egfr_50$IID,]

summary(n_class_eur_egfr_50$COUNT_PGS)
summary(n_class_eur_egfr_50$STABILITY)
length(unique(n_class_eur_egfr_50$IID))

#### afr egfr

In [None]:
n_class_afr_egfr_90 = n_class[n_class$IID %in% df_ntile_norm_afr_egfr_90$IID,]

summary(n_class_afr_egfr_90$COUNT_PGS)
summary(n_class_afr_egfr_90$STABILITY)
length(unique(n_class_afr_egfr_90$IID))

In [None]:
n_class_afr_egfr_80 = n_class[n_class$IID %in% df_ntile_norm_afr_egfr_80$IID,]

summary(n_class_afr_egfr_80$COUNT_PGS)
summary(n_class_afr_egfr_80$STABILITY)
length(unique(n_class_afr_egfr_80$IID))

In [None]:
n_class_afr_egfr_70 = n_class[n_class$IID %in% df_ntile_norm_afr_egfr_70$IID,]

summary(n_class_afr_egfr_70$COUNT_PGS)
summary(n_class_afr_egfr_70$STABILITY)
length(unique(n_class_afr_egfr_70$IID))

In [None]:
n_class_afr_egfr_50 = n_class[n_class$IID %in% df_ntile_norm_afr_egfr_50$IID,]

summary(n_class_afr_egfr_50$COUNT_PGS)
summary(n_class_afr_egfr_50$STABILITY)
length(unique(n_class_afr_egfr_50$IID))

#### meta egfr

In [None]:
n_class_meta_egfr_95 = n_class[n_class$IID %in% df_ntile_norm_meta_egfr_95$IID,]

summary(n_class_meta_egfr_95$COUNT_PGS)
summary(n_class_meta_egfr_95$STABILITY)
length(unique(n_class_meta_egfr_95$IID))

In [None]:
n_class_meta_egfr_90 = n_class[n_class$IID %in% df_ntile_norm_meta_egfr_90$IID,]

summary(n_class_meta_egfr_90$COUNT_PGS)
summary(n_class_meta_egfr_90$STABILITY)
length(unique(n_class_meta_egfr_90$IID))

In [None]:
n_class_meta_egfr_80 = n_class[n_class$IID %in% df_ntile_norm_meta_egfr_80$IID,]

summary(n_class_meta_egfr_80$COUNT_PGS)
summary(n_class_meta_egfr_80$STABILITY)
length(unique(n_class_meta_egfr_80$IID))

In [None]:
n_class_meta_egfr_70 = n_class[n_class$IID %in% df_ntile_norm_meta_egfr_70$IID,]

summary(n_class_meta_egfr_70$COUNT_PGS)
summary(n_class_meta_egfr_70$STABILITY)
length(unique(n_class_meta_egfr_70$IID))

In [None]:
n_class_meta_egfr_50 = n_class[n_class$IID %in% df_ntile_norm_meta_egfr_50$IID,]

summary(n_class_meta_egfr_50$COUNT_PGS)
summary(n_class_meta_egfr_50$STABILITY)
length(unique(n_class_meta_egfr_50$IID))

### all individuals multiancestry scores

#### eur egfr

In [None]:
n_class_multiancestry_score_eur_egfr_95 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_95$IID,]

summary(n_class_multiancestry_score_eur_egfr_95$COUNT_PGS)
summary(n_class_multiancestry_score_eur_egfr_95$STABILITY)
length(unique(n_class_multiancestry_score_eur_egfr_95$IID))

In [None]:
n_class_multiancestry_score_eur_egfr_90 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_90$IID,]

summary(n_class_multiancestry_score_eur_egfr_90$COUNT_PGS)
summary(n_class_multiancestry_score_eur_egfr_90$STABILITY)
length(unique(n_class_multiancestry_score_eur_egfr_90$IID))

In [None]:
n_class_multiancestry_score_eur_egfr_80 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_80$IID,]

summary(n_class_multiancestry_score_eur_egfr_80$COUNT_PGS)
summary(n_class_multiancestry_score_eur_egfr_80$STABILITY)
length(unique(n_class_multiancestry_score_eur_egfr_80$IID))

In [None]:
n_class_multiancestry_score_eur_egfr_70 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_70$IID,]

summary(n_class_multiancestry_score_eur_egfr_70$COUNT_PGS)
summary(n_class_multiancestry_score_eur_egfr_70$STABILITY)
length(unique(n_class_multiancestry_score_eur_egfr_70$IID))

In [None]:
n_class_multiancestry_score_eur_egfr_50 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_50$IID,]

summary(n_class_multiancestry_score_eur_egfr_50$COUNT_PGS)
summary(n_class_multiancestry_score_eur_egfr_50$STABILITY)
length(unique(n_class_multiancestry_score_eur_egfr_50$IID))

#### afr egfr

In [None]:
n_class_multiancestry_score_afr_egfr_90 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_90$IID,]

summary(n_class_multiancestry_score_afr_egfr_90$COUNT_PGS)
summary(n_class_multiancestry_score_afr_egfr_90$STABILITY)
length(unique(n_class_multiancestry_score_afr_egfr_90$IID))

In [None]:
n_class_multiancestry_score_afr_egfr_80 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_80$IID,]

summary(n_class_multiancestry_score_afr_egfr_80$COUNT_PGS)
summary(n_class_multiancestry_score_afr_egfr_80$STABILITY)
length(unique(n_class_multiancestry_score_afr_egfr_80$IID))

In [None]:
n_class_multiancestry_score_afr_egfr_70 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_70$IID,]

summary(n_class_multiancestry_score_afr_egfr_70$COUNT_PGS)
summary(n_class_multiancestry_score_afr_egfr_70$STABILITY)
length(unique(n_class_multiancestry_score_afr_egfr_70$IID))

In [None]:
n_class_multiancestry_score_afr_egfr_50 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_50$IID,]

summary(n_class_multiancestry_score_afr_egfr_50$COUNT_PGS)
summary(n_class_multiancestry_score_afr_egfr_50$STABILITY)
length(unique(n_class_multiancestry_score_afr_egfr_50$IID))

#### meta egfr

In [None]:
n_class_multiancestry_score_meta_egfr_95 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_95$IID,]

summary(n_class_multiancestry_score_meta_egfr_95$COUNT_PGS)
summary(n_class_multiancestry_score_meta_egfr_95$STABILITY)
length(unique(n_class_multiancestry_score_meta_egfr_95$IID))

In [None]:
n_class_multiancestry_score_meta_egfr_90 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_90$IID,]

summary(n_class_multiancestry_score_meta_egfr_90$COUNT_PGS)
summary(n_class_multiancestry_score_meta_egfr_90$STABILITY)
length(unique(n_class_multiancestry_score_meta_egfr_90$IID))

In [None]:
n_class_multiancestry_score_meta_egfr_80 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_80$IID,]

summary(n_class_multiancestry_score_meta_egfr_80$COUNT_PGS)
summary(n_class_multiancestry_score_meta_egfr_80$STABILITY)
length(unique(n_class_multiancestry_score_meta_egfr_80$IID))

In [None]:
n_class_multiancestry_score_meta_egfr_70 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_70$IID,]

summary(n_class_multiancestry_score_meta_egfr_70$COUNT_PGS)
summary(n_class_multiancestry_score_meta_egfr_70$STABILITY)
length(unique(n_class_multiancestry_score_meta_egfr_70$IID))

In [None]:
n_class_multiancestry_score_meta_egfr_50 = n_class_multiancestry_score[n_class_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_50$IID,]

summary(n_class_multiancestry_score_meta_egfr_50$COUNT_PGS)
summary(n_class_multiancestry_score_meta_egfr_50$STABILITY)
length(unique(n_class_multiancestry_score_meta_egfr_50$IID))

### eur

#### eur egfr

In [None]:
n_class_eur_indiv_eur_egfr_95 = n_class_eur[n_class_eur$IID %in% df_ntile_norm_eur_egfr_95$IID,]

summary(n_class_eur_indiv_eur_egfr_95$COUNT_PGS)
summary(n_class_eur_indiv_eur_egfr_95$STABILITY)
length(unique(n_class_eur_indiv_eur_egfr_95$IID))

In [None]:
n_class_eur_indiv_eur_egfr_90 = n_class_eur[n_class_eur$IID %in% df_ntile_norm_eur_egfr_90$IID,]

summary(n_class_eur_indiv_eur_egfr_90$COUNT_PGS)
summary(n_class_eur_indiv_eur_egfr_90$STABILITY)
length(unique(n_class_eur_indiv_eur_egfr_90$IID))

In [None]:
n_class_eur_indiv_eur_egfr_80 = n_class_eur[n_class_eur$IID %in% df_ntile_norm_eur_egfr_80$IID,]

summary(n_class_eur_indiv_eur_egfr_80$COUNT_PGS)
summary(n_class_eur_indiv_eur_egfr_80$STABILITY)
length(unique(n_class_eur_indiv_eur_egfr_80$IID))

In [None]:
n_class_eur_indiv_eur_egfr_70 = n_class_eur[n_class_eur$IID %in% df_ntile_norm_eur_egfr_70$IID,]

summary(n_class_eur_indiv_eur_egfr_70$COUNT_PGS)
summary(n_class_eur_indiv_eur_egfr_70$STABILITY)
length(unique(n_class_eur_indiv_eur_egfr_70$IID))

In [None]:
n_class_eur_indiv_eur_egfr_50 = n_class_eur[n_class_eur$IID %in% df_ntile_norm_eur_egfr_50$IID,]

summary(n_class_eur_indiv_eur_egfr_50$COUNT_PGS)
summary(n_class_eur_indiv_eur_egfr_50$STABILITY)
length(unique(n_class_eur_indiv_eur_egfr_50$IID))

#### meta egfr

In [None]:
n_class_eur_indiv_meta_egfr_95 = n_class_eur[n_class_eur$IID %in% df_ntile_norm_meta_egfr_95$IID,]

summary(n_class_eur_indiv_meta_egfr_95$COUNT_PGS)
summary(n_class_eur_indiv_meta_egfr_95$STABILITY)
length(unique(n_class_eur_indiv_meta_egfr_95$IID))

In [None]:
n_class_eur_indiv_meta_egfr_90 = n_class_eur[n_class_eur$IID %in% df_ntile_norm_meta_egfr_90$IID,]

summary(n_class_eur_indiv_meta_egfr_90$COUNT_PGS)
summary(n_class_eur_indiv_meta_egfr_90$STABILITY)
length(unique(n_class_eur_indiv_meta_egfr_90$IID))

In [None]:
n_class_eur_indiv_meta_egfr_80 = n_class_eur[n_class_eur$IID %in% df_ntile_norm_meta_egfr_80$IID,]

summary(n_class_eur_indiv_meta_egfr_80$COUNT_PGS)
summary(n_class_eur_indiv_meta_egfr_80$STABILITY)
length(unique(n_class_eur_indiv_meta_egfr_80$IID))

In [None]:
n_class_eur_indiv_meta_egfr_70 = n_class_eur[n_class_eur$IID %in% df_ntile_norm_meta_egfr_70$IID,]

summary(n_class_eur_indiv_meta_egfr_70$COUNT_PGS)
summary(n_class_eur_indiv_meta_egfr_70$STABILITY)
length(unique(n_class_eur_indiv_meta_egfr_70$IID))

In [None]:
n_class_eur_indiv_meta_egfr_50 = n_class_eur[n_class_eur$IID %in% df_ntile_norm_meta_egfr_50$IID,]

summary(n_class_eur_indiv_meta_egfr_50$COUNT_PGS)
summary(n_class_eur_indiv_meta_egfr_50$STABILITY)
length(unique(n_class_eur_indiv_meta_egfr_70$IID))

### eur individuals multiancestry scores

#### eur egfr

In [None]:
n_class_eur_indiv_multiancestry_score_eur_egfr_95 = n_class_eur_multiancestry_score[n_class_eur_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_95$IID,]

summary(n_class_eur_indiv_multiancestry_score_eur_egfr_95$COUNT_PGS)
summary(n_class_eur_indiv_multiancestry_score_eur_egfr_95$STABILITY)
length(unique(n_class_eur_indiv_multiancestry_score_eur_egfr_95$IID))

In [None]:
n_class_eur_indiv_multiancestry_score_eur_egfr_90 = n_class_eur_multiancestry_score[n_class_eur_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_90$IID,]

summary(n_class_eur_indiv_multiancestry_score_eur_egfr_90$COUNT_PGS)
summary(n_class_eur_indiv_multiancestry_score_eur_egfr_90$STABILITY)
length(unique(n_class_eur_indiv_multiancestry_score_eur_egfr_90$IID))

In [None]:
n_class_eur_indiv_multiancestry_score_eur_egfr_80 = n_class_eur_multiancestry_score[n_class_eur_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_80$IID,]

summary(n_class_eur_indiv_multiancestry_score_eur_egfr_80$COUNT_PGS)
summary(n_class_eur_indiv_multiancestry_score_eur_egfr_80$STABILITY)
length(unique(n_class_eur_indiv_multiancestry_score_eur_egfr_80$IID))

In [None]:
n_class_eur_indiv_multiancestry_score_eur_egfr_70 = n_class_eur_multiancestry_score[n_class_eur_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_70$IID,]

summary(n_class_eur_indiv_multiancestry_score_eur_egfr_70$COUNT_PGS)
summary(n_class_eur_indiv_multiancestry_score_eur_egfr_70$STABILITY)
length(unique(n_class_eur_indiv_multiancestry_score_eur_egfr_70$IID))

In [None]:
n_class_eur_indiv_multiancestry_score_eur_egfr_50 = n_class_eur_multiancestry_score[n_class_eur_multiancestry_score$IID %in% df_ntile_norm_eur_egfr_50$IID,]

summary(n_class_eur_indiv_multiancestry_score_eur_egfr_50$COUNT_PGS)
summary(n_class_eur_indiv_multiancestry_score_eur_egfr_50$STABILITY)
length(unique(n_class_eur_indiv_multiancestry_score_eur_egfr_50$IID))

#### meta egfr

In [None]:
n_class_eur_indiv_multiancestry_score_meta_egfr_95 = n_class_eur_multiancestry_score[n_class_eur_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_95$IID,]

summary(n_class_eur_indiv_multiancestry_score_meta_egfr_95$COUNT_PGS)
summary(n_class_eur_indiv_multiancestry_score_meta_egfr_95$STABILITY)
length(unique(n_class_eur_indiv_multiancestry_score_meta_egfr_95$IID))

In [None]:
n_class_eur_indiv_multiancestry_score_meta_egfr_90 = n_class_eur_multiancestry_score[n_class_eur_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_90$IID,]

summary(n_class_eur_indiv_multiancestry_score_meta_egfr_90$COUNT_PGS)
summary(n_class_eur_indiv_multiancestry_score_meta_egfr_90$STABILITY)
length(unique(n_class_eur_indiv_multiancestry_score_meta_egfr_90$IID))

In [None]:
n_class_eur_indiv_multiancestry_score_meta_egfr_80 = n_class_eur_multiancestry_score[n_class_eur_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_80$IID,]

summary(n_class_eur_indiv_multiancestry_score_meta_egfr_80$COUNT_PGS)
summary(n_class_eur_indiv_multiancestry_score_meta_egfr_80$STABILITY)
length(unique(n_class_eur_indiv_multiancestry_score_meta_egfr_80$IID))

In [None]:
n_class_eur_indiv_multiancestry_score_meta_egfr_70 = n_class_eur_multiancestry_score[n_class_eur_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_70$IID,]

summary(n_class_eur_indiv_multiancestry_score_meta_egfr_70$COUNT_PGS)
summary(n_class_eur_indiv_multiancestry_score_meta_egfr_70$STABILITY)
length(unique(n_class_eur_indiv_multiancestry_score_meta_egfr_70$IID))

In [None]:
n_class_eur_indiv_multiancestry_score_meta_egfr_50 = n_class_eur_multiancestry_score[n_class_eur_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_50$IID,]

summary(n_class_eur_indiv_multiancestry_score_meta_egfr_50$COUNT_PGS)
summary(n_class_eur_indiv_multiancestry_score_meta_egfr_50$STABILITY)
length(unique(n_class_eur_indiv_multiancestry_score_meta_egfr_50$IID))

### afr

#### afr egfr

In [None]:
n_class_afr_indiv_afr_egfr_90 = n_class_afr[n_class_afr$IID %in% df_ntile_norm_afr_egfr_90$IID,]

summary(n_class_afr_indiv_afr_egfr_90$COUNT_PGS)
summary(n_class_afr_indiv_afr_egfr_90$STABILITY)
length(unique(n_class_afr_indiv_afr_egfr_90$IID))

In [None]:
n_class_afr_indiv_afr_egfr_80 = n_class_afr[n_class_afr$IID %in% df_ntile_norm_afr_egfr_80$IID,]

summary(n_class_afr_indiv_afr_egfr_80$COUNT_PGS)
summary(n_class_afr_indiv_afr_egfr_80$STABILITY)
length(unique(n_class_afr_indiv_afr_egfr_80$IID))

In [None]:
n_class_afr_indiv_afr_egfr_70 = n_class_afr[n_class_afr$IID %in% df_ntile_norm_afr_egfr_70$IID,]

summary(n_class_afr_indiv_afr_egfr_70$COUNT_PGS)
summary(n_class_afr_indiv_afr_egfr_70$STABILITY)
length(unique(n_class_afr_indiv_afr_egfr_70$IID))

In [None]:
n_class_afr_indiv_afr_egfr_50 = n_class_afr[n_class_afr$IID %in% df_ntile_norm_afr_egfr_50$IID,]

summary(n_class_afr_indiv_afr_egfr_50$COUNT_PGS)
summary(n_class_afr_indiv_afr_egfr_50$STABILITY)
length(unique(n_class_afr_indiv_afr_egfr_50$IID))

#### meta egfr

In [None]:
n_class_afr_indiv_meta_egfr_90 = n_class_afr[n_class_afr$IID %in% df_ntile_norm_meta_egfr_90$IID,]

summary(n_class_afr_indiv_meta_egfr_90$COUNT_PGS)
summary(n_class_afr_indiv_meta_egfr_90$STABILITY)
length(unique(n_class_afr_indiv_meta_egfr_90$IID))

In [None]:
n_class_afr_indiv_meta_egfr_80 = n_class_afr[n_class_afr$IID %in% df_ntile_norm_meta_egfr_80$IID,]

summary(n_class_afr_indiv_meta_egfr_80$COUNT_PGS)
summary(n_class_afr_indiv_meta_egfr_80$STABILITY)
length(unique(n_class_afr_indiv_meta_egfr_80$IID))

In [None]:
n_class_afr_indiv_meta_egfr_70 = n_class_afr[n_class_afr$IID %in% df_ntile_norm_meta_egfr_70$IID,]

summary(n_class_afr_indiv_meta_egfr_70$COUNT_PGS)
summary(n_class_afr_indiv_meta_egfr_70$STABILITY)
length(unique(n_class_afr_indiv_meta_egfr_70$IID))

In [None]:
n_class_afr_indiv_meta_egfr_50 = n_class_afr[n_class_afr$IID %in% df_ntile_norm_meta_egfr_50$IID,]

summary(n_class_afr_indiv_meta_egfr_50$COUNT_PGS)
summary(n_class_afr_indiv_meta_egfr_50$STABILITY)
length(unique(n_class_afr_indiv_meta_egfr_50$IID))

### afr individuals multiancestry scores

#### afr egfr

In [None]:
n_class_afr_indiv_multiancestry_score_afr_egfr_90 = n_class_afr_multiancestry_score[n_class_afr_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_90$IID,]

summary(n_class_afr_indiv_multiancestry_score_afr_egfr_90$COUNT_PGS)
summary(n_class_afr_indiv_multiancestry_score_afr_egfr_90$STABILITY)
length(unique(n_class_afr_indiv_multiancestry_score_afr_egfr_90$IID))

In [None]:
n_class_afr_indiv_multiancestry_score_afr_egfr_80 = n_class_afr_multiancestry_score[n_class_afr_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_80$IID,]

summary(n_class_afr_indiv_multiancestry_score_afr_egfr_80$COUNT_PGS)
summary(n_class_afr_indiv_multiancestry_score_afr_egfr_80$STABILITY)
length(unique(n_class_afr_indiv_multiancestry_score_afr_egfr_80$IID))

In [None]:
n_class_afr_indiv_multiancestry_score_afr_egfr_70 = n_class_afr_multiancestry_score[n_class_afr_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_70$IID,]

summary(n_class_afr_indiv_multiancestry_score_afr_egfr_70$COUNT_PGS)
summary(n_class_afr_indiv_multiancestry_score_afr_egfr_70$STABILITY)
length(unique(n_class_afr_indiv_multiancestry_score_afr_egfr_70$IID))

In [None]:
n_class_afr_indiv_multiancestry_score_afr_egfr_50 = n_class_afr_multiancestry_score[n_class_afr_multiancestry_score$IID %in% df_ntile_norm_afr_egfr_50$IID,]

summary(n_class_afr_indiv_multiancestry_score_afr_egfr_50$COUNT_PGS)
summary(n_class_afr_indiv_multiancestry_score_afr_egfr_50$STABILITY)
length(unique(n_class_afr_indiv_multiancestry_score_afr_egfr_50$IID))

#### meta egfr

In [None]:
n_class_afr_indiv_multiancestry_score_meta_egfr_90 = n_class_afr_multiancestry_score[n_class_afr_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_90$IID,]

summary(n_class_afr_indiv_multiancestry_score_meta_egfr_90$COUNT_PGS)
summary(n_class_afr_indiv_multiancestry_score_meta_egfr_90$STABILITY)
length(unique(n_class_afr_indiv_multiancestry_score_meta_egfr_90$IID))

In [None]:
n_class_afr_indiv_multiancestry_score_meta_egfr_80 = n_class_afr_multiancestry_score[n_class_afr_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_80$IID,]

summary(n_class_afr_indiv_multiancestry_score_meta_egfr_80$COUNT_PGS)
summary(n_class_afr_indiv_multiancestry_score_meta_egfr_80$STABILITY)
length(unique(n_class_afr_indiv_multiancestry_score_meta_egfr_80$IID))

In [None]:
n_class_afr_indiv_multiancestry_score_meta_egfr_70 = n_class_afr_multiancestry_score[n_class_afr_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_70$IID,]

summary(n_class_afr_indiv_multiancestry_score_meta_egfr_70$COUNT_PGS)
summary(n_class_afr_indiv_multiancestry_score_meta_egfr_70$STABILITY)
length(unique(n_class_afr_indiv_multiancestry_score_meta_egfr_70$IID))

In [None]:
n_class_afr_indiv_multiancestry_score_meta_egfr_50 = n_class_afr_multiancestry_score[n_class_afr_multiancestry_score$IID %in% df_ntile_norm_meta_egfr_50$IID,]

summary(n_class_afr_indiv_multiancestry_score_meta_egfr_50$COUNT_PGS)
summary(n_class_afr_indiv_multiancestry_score_meta_egfr_50$STABILITY)
length(unique(n_class_afr_indiv_multiancestry_score_meta_egfr_50$IID))

### make plot inputs

### all

#### eur egfr

In [None]:
stability_plot_input_eur_egfr = n_class %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_eur_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'EUR_eGFR_PRScs')
head(stability_plot_input_eur_egfr)

#### afr egfr

In [None]:
stability_plot_input_afr_egfr = n_class %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_afr_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_afr_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_afr_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_afr_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'AFR_eGFR_PRScs')
head(stability_plot_input_afr_egfr)

#### meta egfr

In [None]:
stability_plot_input_meta_egfr = n_class %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_meta_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_meta_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_meta_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_meta_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_meta_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'META_eGFR_PRScsx')
head(stability_plot_input_meta_egfr)

#### combined rows

In [None]:
stability_plot_input_comb = stability_plot_input_eur_egfr %>%
bind_rows(stability_plot_input_afr_egfr) %>%
bind_rows(stability_plot_input_meta_egfr)
head(stability_plot_input_comb)

### all individuals multiancestry scores

#### eur egfr

In [None]:
stability_plot_input_multiancestry_score_eur_egfr = n_class_multiancestry_score %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_eur_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_eur_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_eur_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_eur_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_eur_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'EUR_eGFR_PRScs')
head(stability_plot_input_multiancestry_score_eur_egfr)

#### afr egfr

In [None]:
stability_plot_input_multiancestry_score_afr_egfr = n_class_multiancestry_score %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_afr_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_afr_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_afr_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_afr_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'AFR_eGFR_PRScs')
head(stability_plot_input_multiancestry_score_afr_egfr)

#### meta egfr

In [None]:
stability_plot_input_multiancestry_score_meta_egfr = n_class_multiancestry_score %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_meta_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_meta_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_meta_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_meta_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_multiancestry_score_meta_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'META_eGFR_PRScsx')
head(stability_plot_input_multiancestry_score_meta_egfr)

#### combined

In [None]:
stability_plot_input_multiancestry_score_comb = stability_plot_input_multiancestry_score_eur_egfr %>%
bind_rows(stability_plot_input_multiancestry_score_afr_egfr) %>%
bind_rows(stability_plot_input_multiancestry_score_meta_egfr)
head(stability_plot_input_multiancestry_score_comb)

### eur

#### eur egfr

In [None]:
stability_plot_input_eur_indiv_eur_egfr = n_class_eur %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_eur_indiv_eur_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_eur_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_eur_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_eur_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_eur_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'EUR_eGFR_PRScs')
head(stability_plot_input_eur_indiv_eur_egfr)

#### meta egfr

In [None]:
stability_plot_input_eur_indiv_meta_egfr = n_class_eur %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_eur_indiv_meta_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_meta_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_meta_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_meta_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_meta_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'META_eGFR_PRScsx')
head(stability_plot_input_eur_indiv_meta_egfr)

#### combined

In [None]:
stability_plot_input_eur_indiv_comb = stability_plot_input_eur_indiv_eur_egfr %>%
bind_rows(stability_plot_input_eur_indiv_meta_egfr)
head(stability_plot_input_eur_indiv_comb)

### eur individuals multiancestry scores

#### eur egfr

In [None]:
stability_plot_input_eur_indiv_multiancestry_score_eur_egfr = n_class_eur_multiancestry_score %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_eur_indiv_multiancestry_score_eur_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_multiancestry_score_eur_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_multiancestry_score_eur_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_multiancestry_score_eur_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_multiancestry_score_eur_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'EUR_eGFR_PRScs')
head(stability_plot_input_eur_indiv_multiancestry_score_eur_egfr)

#### meta egfr

In [None]:
stability_plot_input_eur_indiv_multiancestry_score_meta_egfr = n_class_eur_multiancestry_score %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_eur_indiv_multiancestry_score_meta_egfr_95, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('95%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_multiancestry_score_meta_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_multiancestry_score_meta_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_multiancestry_score_meta_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_eur_indiv_multiancestry_score_meta_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '95%_CONF', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'META_eGFR_PRScsx')
head(stability_plot_input_eur_indiv_multiancestry_score_meta_egfr)

#### combined

In [None]:
stability_plot_input_eur_indiv_multiancestry_score_comb = stability_plot_input_eur_indiv_multiancestry_score_eur_egfr %>%
bind_rows(stability_plot_input_eur_indiv_multiancestry_score_meta_egfr)
head(stability_plot_input_eur_indiv_multiancestry_score_comb)

### afr

#### afr egfr

In [None]:
stability_plot_input_afr_indiv_afr_egfr = n_class_afr %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_afr_indiv_afr_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_afr_indiv_afr_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_afr_indiv_afr_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'AFR_eGFR_PRScs')
head(stability_plot_input_afr_indiv_afr_egfr)

#### meta egfr

In [None]:
stability_plot_input_afr_indiv_meta_egfr = n_class_afr %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_afr_indiv_meta_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_afr_indiv_meta_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_afr_indiv_meta_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_afr_indiv_meta_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'META_eGFR_PRScsx')
head(stability_plot_input_afr_indiv_meta_egfr)

#### combined

In [None]:
stability_plot_input_afr_indiv_comb = stability_plot_input_afr_indiv_afr_egfr %>%
bind_rows(stability_plot_input_afr_indiv_meta_egfr)
head(stability_plot_input_afr_indiv_comb)

### afr individuals multiancestry scores

#### afr egfr

In [None]:
stability_plot_input_afr_indiv_multiancestry_score_afr_egfr = n_class_afr_multiancestry_score %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_afr_indiv_multiancestry_score_afr_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_afr_indiv_multiancestry_score_afr_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_afr_indiv_multiancestry_score_afr_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'AFR_eGFR_PRScs')
head(stability_plot_input_afr_indiv_multiancestry_score_afr_egfr)

#### meta egfr

In [None]:
stability_plot_input_afr_indiv_multiancestry_score_meta_egfr = n_class_afr_multiancestry_score %>%
select(-c(COUNT_PGS)) %>%
rename('ALL' = 'STABILITY') %>%
full_join(n_class_afr_indiv_multiancestry_score_meta_egfr_90, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('90%_CONF' = 'STABILITY') %>%
full_join(n_class_afr_indiv_multiancestry_score_meta_egfr_80, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('80%_CONF' = 'STABILITY') %>%
full_join(n_class_afr_indiv_multiancestry_score_meta_egfr_70, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('70%_CONF' = 'STABILITY') %>%
full_join(n_class_afr_indiv_multiancestry_score_meta_egfr_50, by = 'IID') %>%
select(-c(COUNT_PGS)) %>%
rename('50%_CONF' = 'STABILITY') %>%
pivot_longer(cols = c('ALL', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'), names_to = "CONF_THRES", values_to = "STABILITY") %>%
mutate(CONF_THRES = as.factor(CONF_THRES, levels = c('ALL', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF'))) %>%
mutate(TRAINING_GROUP = 'META_eGFR_PRScsx')
head(stability_plot_input_afr_indiv_multiancestry_score_meta_egfr)

#### combined

In [None]:
stability_plot_input_afr_indiv_multiancestry_score_comb = stability_plot_input_afr_indiv_multiancestry_score_afr_egfr %>%
bind_rows(stability_plot_input_afr_indiv_multiancestry_score_meta_egfr)
head(stability_plot_input_afr_indiv_multiancestry_score_comb)

## make plots

### all

In [None]:
ggplot(stability_plot_input_comb, aes(x = CONF_THRES, y = STABILITY, fill = TRAINING_GROUP)) +
geom_boxplot() +
scale_x_discrete(limits = rev(levels(factor(stability_plot_input_comb$CONF_THRES)))) +
labs(title = 'AOU ALL CKD Confidence Thresholding',
    x = 'Confidence Threshold',
    y = 'Stability',
    fill = 'Training Group')
ggsave('AOU.ALL.CKD.YK_Phenotyping.confidence_thresholding.png')

### all individuals multiancestry scores

In [None]:
ggplot(stability_plot_input_multiancestry_score_comb, aes(x = CONF_THRES, y = STABILITY, fill = TRAINING_GROUP)) +
geom_boxplot() +
scale_x_discrete(limits = rev(levels(factor(stability_plot_input_multiancestry_score_comb$CONF_THRES)))) +
labs(title = 'AOU ALL CKD Confidence Thresholding',
    x = 'Confidence Threshold',
    y = 'Stability',
    fill = 'Training Group')
ggsave('AOU.ALL.CKD.YK_Phenotyping.multiancestry_scores.confidence_thresholding.png')

### eur

In [None]:
ggplot(stability_plot_input_eur_indiv_comb, aes(x = CONF_THRES, y = STABILITY, fill = TRAINING_GROUP)) +
geom_boxplot() +
scale_x_discrete(limits = rev(levels(factor(stability_plot_input_eur_indiv_comb$CONF_THRES)))) +
labs(title = 'AOU EUR CKD Confidence Thresholding',
    x = 'Confidence Threshold',
    y = 'Stability',
    fill = 'Training Group')
ggsave('AOU.EUR.CKD.YK_Phenotyping.confidence_thresholding.png')

### eur individuals multiancestry scores

In [None]:
ggplot(stability_plot_input_eur_indiv_multiancestry_score_comb, aes(x = CONF_THRES, y = STABILITY, fill = TRAINING_GROUP)) +
geom_boxplot() +
scale_x_discrete(limits = rev(levels(factor(stability_plot_input_eur_indiv_multiancestry_score_comb$CONF_THRES)))) +
labs(title = 'AOU EUR CKD Confidence Thresholding',
    x = 'Confidence Threshold',
    y = 'Stability',
    fill = 'Training Group')
ggsave('AOU.EUR.CKD.YK_Phenotyping.multiancestry_scores.confidence_thresholding.png')

### afr

In [None]:
ggplot(stability_plot_input_afr_indiv_comb, aes(x = CONF_THRES, y = STABILITY, fill = TRAINING_GROUP)) +
geom_boxplot() +
scale_x_discrete(limits = c('ALL', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF')) +
labs(title = 'AOU AFR CKD Confidence Thresholding',
    x = 'Confidence Threshold',
    y = 'Stability',
    fill = 'Training Group')
ggsave('AOU.AFR.CKD.YK_Phenotyping.confidence_thresholding.png')

### afr individuals multiancestry scores

In [None]:
ggplot(stability_plot_input_afr_indiv_multiancestry_score_comb, aes(x = CONF_THRES, y = STABILITY, fill = TRAINING_GROUP)) +
geom_boxplot() +
scale_x_discrete(limits = c('ALL', '90%_CONF', '80%_CONF', '70%_CONF', '50%_CONF')) +
labs(title = 'AOU AFR CKD Confidence Thresholding',
    x = 'Confidence Threshold',
    y = 'Stability',
    fill = 'Training Group')
ggsave('AOU.AFR.CKD.YK_Phenotyping.multiancestry_scores.confidence_thresholding.png')

# compute ICC

## create functions

In [None]:
equiv_scores <- function(df, criteria, ntile = FALSE) {
  # Ensure criteria is a column in df
  if (!criteria %in% names(df)) {
    #stop("Criteria column not found in dataframe")
    stop(paste0("Criteria column ", criteria, " not found in dataframe"))
  }
  
  # Filter models based on criteria for each group
  models <- df %>%
    group_by(model) %>%
    filter(all(.data[[criteria]] == 1)) %>%
    ungroup() %>%
    pull(model) %>%
    unique()
  
  # Sort models
  sorted_models <- models
  
  # Return the appropriate list based on ntile flag
  if (ntile) {
    ntile_list <- paste("ntile_", sorted_models, sep = "")
    return(ntile_list)
  } else {
    return(sorted_models)
  }
}

In [None]:
ICC_equiv <- function(df_ntile_norm,  model_metrics_df, criteria) {
  ntile_list <- equiv_scores(model_metrics_df, criteria, ntile = TRUE)
  
  df_ntile_criteria <- df_ntile_norm %>% 
    select(all_of(ntile_list)) %>% 
    icc(., model = "twoway", type = "agreement")
  
  df_ntile_criteria$metric <- criteria
  df_ntile_tibble <- tibble(
    icc = df_ntile_criteria$value,
    ubound = df_ntile_criteria$ubound,
    lbound = df_ntile_criteria$lbound,
    Raters = df_ntile_criteria$raters,
    Criteria = df_ntile_criteria$metric)
  
  return(df_ntile_tibble)
}

## read in input files (if needed)

### all

In [None]:
df_ntile_norm_wide = fread('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.wide_form.txt',
                           sep = '\t')

In [None]:
model_metrics_df = read.csv('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                            sep = '\t')

### all individuals multiancestry scores

In [None]:
df_ntile_norm_wide_multiancestry_score = fread('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_percentile.wide_form.txt',
                                               sep = '\t')

In [None]:
model_metrics_df_multiancestry_score = read.csv('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.performance_metrics.txt',
                                                sep = '\t')

### eur

In [None]:
df_ntile_norm_wide_eur = fread('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.wide_form.txt',
                               sep = '\t')

In [None]:
model_metrics_df_eur = read.csv('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                                sep = '\t')

### eur individuals multiancestry scores

In [None]:
df_ntile_norm_wide_eur_multiancestry_score = fread('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_percentile.wide_form.txt',
                                                   sep = '\t')

In [None]:
model_metrics_df_eur_multiancestry_score = read.csv('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.performance_metrics.txt',
                                                    sep = '\t')

### afr

In [None]:
df_ntile_norm_wide_afr = fread('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.wide_form.txt',
                               sep = '\t')

In [None]:
model_metrics_df_afr = read.csv('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                                sep = '\t')

### afr individuals multiancestry scores

In [None]:
df_ntile_norm_wide_afr_multiancestry_score = fread('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_percentile.wide_form.txt',
                                                   sep = '\t')

In [None]:
model_metrics_df_afr_multiancestry_score = read.csv('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.performance_metrics.txt',
                                                    sep = '\t')

## make dfs with comorbidities cases/controls

### include

In [None]:
t2d_case = all_score %>%
filter(T2D == 1) %>%
distinct(IID)
nrow(t2d_case)

In [None]:
t2d_control = all_score %>%
filter(T2D == 0) %>%
distinct(IID)
nrow(t2d_control)

In [None]:
ht_yk_case = all_score %>%
filter(HT_YK == 1) %>%
distinct(IID)
nrow(ht_yk_case)

In [None]:
ht_yk_control = all_score %>%
filter(HT_YK == 0) %>%
distinct(IID)
nrow(ht_yk_control)

In [None]:
age_less_60 = all_score %>%
filter(AGE < 60) %>%
distinct(IID)
nrow(age_less_60)

In [None]:
age_greater_equal_60 = all_score %>%
filter(AGE >= 60) %>%
distinct(IID)
nrow(age_greater_equal_60)

## all individuals

### no stratification

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

original_df = df_ntile_norm_wide
model_metrics = model_metrics_df

df = original_df

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res = outcome

### stratified by clinical covariates

#### T2D

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = t2d_case
original_df = df_ntile_norm_wide
model_metrics = model_metrics_df

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_t2d_case = outcome

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = t2d_control
original_df = df_ntile_norm_wide
model_metrics = model_metrics_df

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_t2d_control = outcome

#### HT YK

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = ht_yk_case
original_df = df_ntile_norm_wide
model_metrics = model_metrics_df

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_ht_yk_case = outcome

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = ht_yk_control
original_df = df_ntile_norm_wide
model_metrics = model_metrics_df

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_ht_yk_control = outcome

#### Age

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = age_less_60
original_df = df_ntile_norm_wide
model_metrics = model_metrics_df

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.age_less_60.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_age_less_60 = outcome

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = age_greater_equal_60
original_df = df_ntile_norm_wide
model_metrics = model_metrics_df

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.age_greater_equal_60.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_age_greater_equal_60 = outcome

## all individuals multiancestry scores

### No Stratification

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

original_df = df_ntile_norm_wide_multiancestry_score
model_metrics = model_metrics_df_multiancestry_score

df = original_df

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_multiancestry_score = outcome

### Stratification by Clinical Covariates

#### T2D

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = t2d_case
original_df = df_ntile_norm_wide_multiancestry_score
model_metrics = model_metrics_df_multiancestry_score

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_multiancestry_score_t2d_case = outcome

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = t2d_control
original_df = df_ntile_norm_wide_multiancestry_score
model_metrics = model_metrics_df_multiancestry_score

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_multiancestry_score_t2d_control = outcome

#### HT YK

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = ht_yk_case
original_df = df_ntile_norm_wide_multiancestry_score
model_metrics = model_metrics_df_multiancestry_score

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_multiancestry_score_ht_yk_case = outcome

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = ht_yk_control
original_df = df_ntile_norm_wide_multiancestry_score
model_metrics = model_metrics_df_multiancestry_score

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_multiancestry_score_ht_yk_control = outcome

#### Age

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = age_less_60
original_df = df_ntile_norm_wide_multiancestry_score
model_metrics = model_metrics_df_multiancestry_score

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.age_less_60.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_multiancestry_score_age_less_60 = outcome

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = age_greater_equal_60
original_df = df_ntile_norm_wide_multiancestry_score
model_metrics = model_metrics_df_multiancestry_score

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.age_greater_equal_60.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_multiancestry_score_age_greater_equal_60 = outcome

## EUR

### No Stratification

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

original_df = df_ntile_norm_wide_eur
model_metrics = model_metrics_df_eur

df = original_df

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_eur = outcome

### Stratified by Clinical Covariates

#### T2D

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = t2d_case
original_df = df_ntile_norm_wide_eur
model_metrics = model_metrics_df_eur

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_eur_t2d_case = outcome

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = t2d_control
original_df = df_ntile_norm_wide_eur
model_metrics = model_metrics_df_eur

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_eur_t2d_control = outcome

#### HT YK

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = ht_yk_case
original_df = df_ntile_norm_wide_eur
model_metrics = model_metrics_df_eur

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_eur_ht_yk_case = outcome

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = ht_yk_control
original_df = df_ntile_norm_wide_eur
model_metrics = model_metrics_df_eur

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_eur_ht_yk_control = outcome

#### Age

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = age_less_60
original_df = df_ntile_norm_wide_eur
model_metrics = model_metrics_df_eur

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.age_less_60.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_eur_age_less_60 = outcome

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = age_greater_equal_60
original_df = df_ntile_norm_wide_eur
model_metrics = model_metrics_df_eur

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.age_greater_equal_60.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_eur_age_greater_equal_60 = outcome

## eur individuals multiancestry scores

### no stratification

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

original_df = df_ntile_norm_wide_eur_multiancestry_score
model_metrics = model_metrics_df_eur_multiancestry_score

df = original_df

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_eur_multiancestry_score = outcome

### stratified by clinical covariates

#### T2D

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = t2d_case
original_df = df_ntile_norm_wide_eur_multiancestry_score
model_metrics = model_metrics_df_eur_multiancestry_score

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_eur_multiancestry_score_t2d_case = outcome

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = t2d_control
original_df = df_ntile_norm_wide_eur_multiancestry_score
model_metrics = model_metrics_df_eur_multiancestry_score

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_eur_multiancestry_score_t2d_control = outcome

#### HT YK

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = ht_yk_case
original_df = df_ntile_norm_wide_eur_multiancestry_score
model_metrics = model_metrics_df_eur_multiancestry_score

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_eur_multiancestry_score_ht_yk_case = outcome

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = ht_yk_control
original_df = df_ntile_norm_wide_eur_multiancestry_score
model_metrics = model_metrics_df_eur_multiancestry_score

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_eur_multiancestry_score_ht_yk_control = outcome

#### Age

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = age_less_60
original_df = df_ntile_norm_wide_eur_multiancestry_score
model_metrics = model_metrics_df_eur_multiancestry_score

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.age_less_60.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_eur_multiancestry_score_age_less_60 = outcome

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = age_greater_equal_60
original_df = df_ntile_norm_wide_eur_multiancestry_score
model_metrics = model_metrics_df_eur_multiancestry_score

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.age_greater_equal_60.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_eur_multiancestry_score_age_greater_equal_60 = outcome

## AFR

### No Stratification

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

original_df = df_ntile_norm_wide_afr
model_metrics = model_metrics_df_afr

df = original_df

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_afr = outcome

### Stratified by Clinical Covariates

#### T2D

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = t2d_case
original_df = df_ntile_norm_wide_afr
model_metrics = model_metrics_df_afr

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_afr_t2d_case = outcome

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = t2d_control
original_df = df_ntile_norm_wide_afr
model_metrics = model_metrics_df_afr

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_afr_t2d_control = outcome

#### HT YK

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = ht_yk_case
original_df = df_ntile_norm_wide_afr
model_metrics = model_metrics_df_afr

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_afr_ht_yk_case = outcome

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = ht_yk_control
original_df = df_ntile_norm_wide_afr
model_metrics = model_metrics_df_afr

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_afr_ht_yk_control = outcome

#### Age

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = age_less_60
original_df = df_ntile_norm_wide_afr
model_metrics = model_metrics_df_afr

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.age_less_60.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_afr_age_less_60 = outcome

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = age_greater_equal_60
original_df = df_ntile_norm_wide_afr
model_metrics = model_metrics_df_afr

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.age_greater_equal_60.PGS_Znorm2.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_afr_age_greater_equal_60 = outcome

## afr individuals multiancestry scores

### No Stratification

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

original_df = df_ntile_norm_wide_afr_multiancestry_score
model_metrics = model_metrics_df_afr_multiancestry_score

df = original_df

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_afr_multiancestry_score = outcome

### Stratified by Clinical Covariates

#### T2D

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = t2d_case
original_df = df_ntile_norm_wide_afr_multiancestry_score
model_metrics = model_metrics_df_afr_multiancestry_score

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_afr_multiancestry_score_t2d_case = outcome

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = t2d_control
original_df = df_ntile_norm_wide_afr_multiancestry_score
model_metrics = model_metrics_df_afr_multiancestry_score

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_afr_multiancestry_score_t2d_control = outcome

#### HT YK

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = ht_yk_case
original_df = df_ntile_norm_wide_afr_multiancestry_score
model_metrics = model_metrics_df_afr_multiancestry_score

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_afr_multiancestry_score_ht_yk_case = outcome

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = ht_yk_control
original_df = df_ntile_norm_wide_afr_multiancestry_score
model_metrics = model_metrics_df_afr_multiancestry_score

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_afr_multiancestry_score_ht_yk_control = outcome

#### Age

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = age_less_60
original_df = df_ntile_norm_wide_afr_multiancestry_score
model_metrics = model_metrics_df_afr_multiancestry_score

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.age_less_60.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_afr_multiancestry_score_age_less_60 = outcome

In [None]:
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

filter_df = age_greater_equal_60
original_df = df_ntile_norm_wide_afr_multiancestry_score
model_metrics = model_metrics_df_afr_multiancestry_score

df = original_df %>%
filter(IID %in% filter_df$IID)

outcome <- lapply(metrics, function(m){
  ICC_equiv(df, model_metrics, m)
}) %>% 
  bind_rows(.)

head(outcome)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.age_greater_equal_60.PGS_Znorm2.multiancestry_scores.individual_agreement.ICC.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

ICC_res_afr_multiancestry_score_age_greater_equal_60 = outcome

## make linegraph

### merge

#### no stratification

In [None]:
icc_merge = ICC_res %>%
select(Criteria, icc)%>%
rename('ALL_indiv.all_scores' = 'icc') %>%
left_join(ICC_res_multiancestry_score, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('ALL_indiv.multiancestry_scores' = 'icc') %>%
left_join(ICC_res_eur, by = 'Criteria')%>%
select(-c(ubound, lbound, Raters))%>%
rename('EUR_indiv.all_scores' = 'icc') %>%
left_join(ICC_res_eur_multiancestry_score, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters)) %>%
rename('EUR_indiv.multiancestry_scores' = 'icc') %>%
left_join(ICC_res_afr, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters)) %>%
rename('AFR_indiv.all_scores' = 'icc') %>%
left_join(ICC_res_afr_multiancestry_score, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters)) %>%
rename('AFR_indiv.multiancestry_scores' = 'icc') %>%
mutate(Criteria = gsub('prob_dif', 'Statistically_Equivalent', Criteria)) %>%
mutate(Criteria = gsub('ROPE_005', 'ROPE_0.005', Criteria)) %>%
mutate(Criteria = gsub('ROPE_01', 'ROPE_0.01', Criteria)) %>%
mutate(Criteria = gsub('ROPE_02', 'ROPE_0.02', Criteria))
icc_merge

#### stratified by clinical covariates

##### all

In [None]:
icc_merge_clinical = ICC_res %>%
select(Criteria, icc)%>%
rename('No Stratification' = 'icc') %>%
left_join(ICC_res_t2d_case, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('T2D cases' = 'icc') %>%
left_join(ICC_res_t2d_control, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('T2D controls' = 'icc') %>%
left_join(ICC_res_ht_yk_case, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('HT case' = 'icc') %>%
left_join(ICC_res_ht_yk_control, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('HT control' = 'icc') %>%
left_join(ICC_res_age_less_60, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('Age < 60' = 'icc') %>%
left_join(ICC_res_age_greater_equal_60, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('Age >= 60' = 'icc') %>%
mutate(Criteria = gsub('prob_dif', 'Statistically_Equivalent', Criteria)) %>%
mutate(Criteria = gsub('ROPE_005', 'ROPE_0.005', Criteria)) %>%
mutate(Criteria = gsub('ROPE_01', 'ROPE_0.01', Criteria)) %>%
mutate(Criteria = gsub('ROPE_02', 'ROPE_0.02', Criteria))
icc_merge_clinical

##### all individuals multiancestry scores

In [None]:
icc_merge_multiancestry_score_clinical = ICC_res_multiancestry_score %>%
select(Criteria, icc)%>%
rename('No Stratification' = 'icc') %>%
left_join(ICC_res_multiancestry_score_t2d_case, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('T2D cases' = 'icc') %>%
left_join(ICC_res_multiancestry_score_t2d_control, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('T2D controls' = 'icc') %>%
left_join(ICC_res_multiancestry_score_ht_yk_case, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('HT case' = 'icc') %>%
left_join(ICC_res_multiancestry_score_ht_yk_control, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('HT control' = 'icc') %>%
left_join(ICC_res_multiancestry_score_age_less_60, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('Age < 60' = 'icc') %>%
left_join(ICC_res_multiancestry_score_age_greater_equal_60, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('Age >= 60' = 'icc') %>%
mutate(Criteria = gsub('prob_dif', 'Statistically_Equivalent', Criteria)) %>%
mutate(Criteria = gsub('ROPE_005', 'ROPE_0.005', Criteria)) %>%
mutate(Criteria = gsub('ROPE_01', 'ROPE_0.01', Criteria)) %>%
mutate(Criteria = gsub('ROPE_02', 'ROPE_0.02', Criteria))
icc_merge_multiancestry_score_clinical

##### eur

In [None]:
icc_merge_eur_clinical = ICC_res_eur %>%
select(Criteria, icc)%>%
rename('No Stratification' = 'icc') %>%
left_join(ICC_res_eur_t2d_case, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('T2D cases' = 'icc') %>%
left_join(ICC_res_eur_t2d_control, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('T2D controls' = 'icc') %>%
left_join(ICC_res_eur_ht_yk_case, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('HT case' = 'icc') %>%
left_join(ICC_res_eur_ht_yk_control, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('HT control' = 'icc') %>%
left_join(ICC_res_eur_age_less_60, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('Age < 60' = 'icc') %>%
left_join(ICC_res_eur_age_greater_equal_60, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('Age >= 60' = 'icc') %>%
mutate(Criteria = gsub('prob_dif', 'Statistically_Equivalent', Criteria)) %>%
mutate(Criteria = gsub('ROPE_005', 'ROPE_0.005', Criteria)) %>%
mutate(Criteria = gsub('ROPE_01', 'ROPE_0.01', Criteria)) %>%
mutate(Criteria = gsub('ROPE_02', 'ROPE_0.02', Criteria))
icc_merge_eur_clinical

##### eur individuals multiancestry scores

In [None]:
icc_merge_eur_multiancestry_score_clinical = ICC_res_eur_multiancestry_score %>%
select(Criteria, icc)%>%
rename('No Stratification' = 'icc') %>%
left_join(ICC_res_eur_multiancestry_score_t2d_case, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('T2D cases' = 'icc') %>%
left_join(ICC_res_eur_multiancestry_score_t2d_control, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('T2D controls' = 'icc') %>%
left_join(ICC_res_eur_multiancestry_score_ht_yk_case, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('HT case' = 'icc') %>%
left_join(ICC_res_eur_multiancestry_score_ht_yk_control, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('HT control' = 'icc') %>%
left_join(ICC_res_eur_multiancestry_score_age_less_60, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('Age < 60' = 'icc') %>%
left_join(ICC_res_eur_multiancestry_score_age_greater_equal_60, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('Age >= 60' = 'icc') %>%
mutate(Criteria = gsub('prob_dif', 'Statistically_Equivalent', Criteria)) %>%
mutate(Criteria = gsub('ROPE_005', 'ROPE_0.005', Criteria)) %>%
mutate(Criteria = gsub('ROPE_01', 'ROPE_0.01', Criteria)) %>%
mutate(Criteria = gsub('ROPE_02', 'ROPE_0.02', Criteria))
icc_merge_eur_multiancestry_score_clinical

##### afr

In [None]:
icc_merge_afr_clinical = ICC_res_afr %>%
select(Criteria, icc)%>%
rename('No Stratification' = 'icc') %>%
left_join(ICC_res_afr_t2d_case, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('T2D cases' = 'icc') %>%
left_join(ICC_res_afr_t2d_control, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('T2D controls' = 'icc') %>%
left_join(ICC_res_afr_ht_yk_case, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('HT case' = 'icc') %>%
left_join(ICC_res_afr_ht_yk_control, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('HT control' = 'icc') %>%
left_join(ICC_res_afr_age_less_60, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('Age < 60' = 'icc') %>%
left_join(ICC_res_afr_age_greater_equal_60, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('Age >= 60' = 'icc') %>%
mutate(Criteria = gsub('prob_dif', 'Statistically_Equivalent', Criteria)) %>%
mutate(Criteria = gsub('ROPE_005', 'ROPE_0.005', Criteria)) %>%
mutate(Criteria = gsub('ROPE_01', 'ROPE_0.01', Criteria)) %>%
mutate(Criteria = gsub('ROPE_02', 'ROPE_0.02', Criteria))
icc_merge_afr_clinical

##### afr individuals multiancestry score

In [None]:
icc_merge_afr_multiancestry_score_clinical = ICC_res_afr_multiancestry_score %>%
select(Criteria, icc)%>%
rename('No Stratification' = 'icc') %>%
left_join(ICC_res_afr_multiancestry_score_t2d_case, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('T2D cases' = 'icc') %>%
left_join(ICC_res_afr_multiancestry_score_t2d_control, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('T2D controls' = 'icc') %>%
left_join(ICC_res_afr_multiancestry_score_ht_yk_case, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('HT case' = 'icc') %>%
left_join(ICC_res_afr_multiancestry_score_ht_yk_control, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('HT control' = 'icc') %>%
left_join(ICC_res_afr_multiancestry_score_age_less_60, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('Age < 60' = 'icc') %>%
left_join(ICC_res_afr_multiancestry_score_age_greater_equal_60, by = 'Criteria') %>%
select(-c(ubound, lbound, Raters))%>%
rename('Age >= 60' = 'icc') %>%
mutate(Criteria = gsub('prob_dif', 'Statistically_Equivalent', Criteria)) %>%
mutate(Criteria = gsub('ROPE_005', 'ROPE_0.005', Criteria)) %>%
mutate(Criteria = gsub('ROPE_01', 'ROPE_0.01', Criteria)) %>%
mutate(Criteria = gsub('ROPE_02', 'ROPE_0.02', Criteria))
icc_merge_afr_multiancestry_score_clinical

### convert to long form

#### no stratification

In [None]:
icc_merge_long <- icc_merge %>%
pivot_longer(cols = ends_with("_scores"),
             names_to = "score_type",
             values_to = "score_value") %>%
mutate(Criteria = factor(Criteria, levels = c("Statistically_Equivalent", "ROPE_0.005", "ROPE_0.01", "ROPE_0.02")))
icc_merge_long

#### stratified by clinical covariates

In [None]:
icc_merge_clinical_long <- icc_merge_clinical %>%
pivot_longer(cols = -Criteria,
             names_to = "score_type",
             values_to = "score_value") %>%
mutate(Criteria = factor(Criteria, levels = c("Statistically_Equivalent", "ROPE_0.005", "ROPE_0.01", "ROPE_0.02")))
dim(icc_merge_clinical_long)

In [None]:
icc_merge_multiancestry_score_clinical_long <- icc_merge_multiancestry_score_clinical %>%
pivot_longer(cols = -Criteria,
             names_to = "score_type",
             values_to = "score_value") %>%
mutate(Criteria = factor(Criteria, levels = c("Statistically_Equivalent", "ROPE_0.005", "ROPE_0.01", "ROPE_0.02")))
dim(icc_merge_multiancestry_score_clinical_long)

In [None]:
icc_merge_eur_clinical_long <- icc_merge_eur_clinical %>%
pivot_longer(cols = -Criteria,
             names_to = "score_type",
             values_to = "score_value") %>%
mutate(Criteria = factor(Criteria, levels = c("Statistically_Equivalent", "ROPE_0.005", "ROPE_0.01", "ROPE_0.02")))
dim(icc_merge_eur_clinical_long)

In [None]:
icc_merge_eur_multiancestry_score_clinical_long <- icc_merge_eur_multiancestry_score_clinical %>%
pivot_longer(cols = -Criteria,
             names_to = "score_type",
             values_to = "score_value") %>%
mutate(Criteria = factor(Criteria, levels = c("Statistically_Equivalent", "ROPE_0.005", "ROPE_0.01", "ROPE_0.02")))
dim(icc_merge_eur_multiancestry_score_clinical_long)

In [None]:
icc_merge_afr_clinical_long <- icc_merge_afr_clinical %>%
pivot_longer(cols = -Criteria,
             names_to = "score_type",
             values_to = "score_value") %>%
mutate(Criteria = factor(Criteria, levels = c("Statistically_Equivalent", "ROPE_0.005", "ROPE_0.01", "ROPE_0.02")))
dim(icc_merge_afr_clinical_long)

In [None]:
icc_merge_afr_multiancestry_score_clinical_long <- icc_merge_afr_multiancestry_score_clinical %>%
pivot_longer(cols = -Criteria,
             names_to = "score_type",
             values_to = "score_value") %>%
mutate(Criteria = factor(Criteria, levels = c("Statistically_Equivalent", "ROPE_0.005", "ROPE_0.01", "ROPE_0.02")))
dim(icc_merge_afr_multiancestry_score_clinical_long)

### make linegraph

#### no stratfication

In [None]:
icc_plot = ggplot(icc_merge_long, aes(x = Criteria, y = score_value, color = score_type, group = score_type)) +
geom_point() +
geom_line() +
labs(title = "AOU CKD PGS ICC",
     x = "Equivalence Criteria",
     y = "ICC",
     color = "Score") +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
     plot.margin = unit(c(1, 1, 1, 1), "cm"))

icc_plot
ggsave('AOU.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.ICC.line_graph.png',
       width = 10,
       height = 6,
       dpi = 300,
       bg = "white")

#### stratified by clinical covariates

##### all

In [None]:
icc_clinical_plot = ggplot(icc_merge_clinical_long, aes(x = Criteria, y = score_value, color = score_type, group = score_type)) +
geom_point() +
geom_line() +
labs(title = "AOU ALL CKD PGS ICC",
     subtitle = 'Stratified by Clinical Covariates',
     x = "Equivalence Criteria",
     y = "ICC",
     color = "Stratification") +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
     plot.margin = unit(c(1, 1, 1, 1), "cm"))

icc_clinical_plot
ggsave('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.clinical_covariate_stratification.T2D.HT_YK.AGE.individual_agreement.ICC.line_graph.png',
       width = 12,
       height = 6,
       dpi = 300,
       bg = "white")

##### all individuals multiancestry scores

In [None]:
icc_clinical_plot = ggplot(icc_merge_multiancestry_score_clinical_long, aes(x = Criteria, y = score_value, color = score_type, group = score_type)) +
geom_point() +
geom_line() +
labs(title = "AOU ALL CKD PGS ICC",
     subtitle = 'EUR/Multiancestry Scores Only\nStratified by Clinical Covariates',
     x = "Equivalence Criteria",
     y = "ICC",
     color = "Stratification") +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
     plot.margin = unit(c(1, 1, 1, 1), "cm"))

icc_clinical_plot
ggsave('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.clinical_covariate_stratification.T2D.HT_YK.AGE.individual_agreement.ICC.line_graph.png',
       width = 12,
       height = 6,
       dpi = 300,
       bg = "white")

##### eur

In [None]:
icc_clinical_plot = ggplot(icc_merge_eur_clinical_long, aes(x = Criteria, y = score_value, color = score_type, group = score_type)) +
geom_point() +
geom_line() +
labs(title = "AOU EUR CKD PGS ICC",
     subtitle = 'Stratified by Clinical Covariates',
     x = "Equivalence Criteria",
     y = "ICC",
     color = "Stratification") +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
     plot.margin = unit(c(1, 1, 1, 1), "cm"))

icc_clinical_plot
ggsave('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.clinical_covariate_stratification.T2D.HT_YK.AGE.individual_agreement.ICC.line_graph.png',
       width = 12,
       height = 6,
       dpi = 300,
       bg = "white")

##### eur individuals multiancestry scores

In [None]:
icc_clinical_plot = ggplot(icc_merge_eur_multiancestry_score_clinical_long, aes(x = Criteria, y = score_value, color = score_type, group = score_type)) +
geom_point() +
geom_line() +
labs(title = "AOU EUR CKD PGS ICC",
     subtitle = 'EUR/Multiancestry Scores Only\nStratified by Clinical Covariates',
     x = "Equivalence Criteria",
     y = "ICC",
     color = "Stratification") +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
     plot.margin = unit(c(1, 1, 1, 1), "cm"))

icc_clinical_plot
ggsave('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.clinical_covariate_stratification.T2D.HT_YK.AGE.individual_agreement.ICC.line_graph.png',
       width = 12,
       height = 6,
       dpi = 300,
       bg = "white")

##### afr

In [None]:
icc_clinical_plot = ggplot(icc_merge_afr_clinical_long, aes(x = Criteria, y = score_value, color = score_type, group = score_type)) +
geom_point() +
geom_line() +
labs(title = "AOU AFR CKD PGS ICC",
     subtitle = 'Stratified by Clinical Covariates',
     x = "Equivalence Criteria",
     y = "ICC",
     color = "Stratification") +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
     plot.margin = unit(c(1, 1, 1, 1), "cm"))

icc_clinical_plot
ggsave('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.clinical_covariate_stratification.T2D.HT_YK.AGE.individual_agreement.ICC.line_graph.png',
       width = 12,
       height = 6,
       dpi = 300,
       bg = "white")

##### afr individuals multiancestry scores

In [None]:
icc_clinical_plot = ggplot(icc_merge_afr_multiancestry_score_clinical_long, aes(x = Criteria, y = score_value, color = score_type, group = score_type)) +
geom_point() +
geom_line() +
labs(title = "AOU AFR CKD PGS ICC",
     subtitle = 'EUR/Multiancestry Scores Only\nStratified by Clinical Covariates',
     x = "Equivalence Criteria",
     y = "ICC",
     color = "Stratification") +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
     plot.margin = unit(c(1, 1, 1, 1), "cm"))

icc_clinical_plot
ggsave('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.clinical_covariate_stratification.T2D.HT_YK.AGE.individual_agreement.ICC.line_graph.png',
       width = 12,
       height = 6,
       dpi = 300,
       bg = "white")

# compute light's kappa

## make equiv scores function (if needed)

In [None]:
equiv_scores <- function(df, criteria, ntile = FALSE) {
  # Ensure criteria is a column in df
  if (!criteria %in% names(df)) {
    #stop("Criteria column not found in dataframe")
    stop(paste0("Criteria column ", criteria, " not found in dataframe"))
  }
  
  # Filter models based on criteria for each group
  models <- df %>%
    group_by(model) %>%
    filter(all(.data[[criteria]] == 1)) %>%
    ungroup() %>%
    pull(model) %>%
    unique()
  
  # Sort models
  sorted_models <- models
  
  # Return the appropriate list based on ntile flag
  if (ntile) {
    ntile_list <- paste("ntile_", sorted_models, sep = "")
    return(ntile_list)
  } else {
    return(sorted_models)
  }
}

## read in input files (if needed)

### all

In [None]:
df_ntile_norm_wide = fread('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.wide_form.txt',
                           sep = '\t')
head(df_ntile_norm_wide)

In [None]:
model_metrics_df = read.csv('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                            sep = '\t')
head(model_metrics_df)

### all individuals multiancestry scores

In [None]:
df_ntile_norm_wide_multiancestry_score = fread('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_percentile.wide_form.txt',
                                               sep = '\t')
head(df_ntile_norm_wide_multiancestry_score)

In [None]:
model_metrics_df_multiancestry_score = read.csv('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.performance_metrics.txt',
                                                sep = '\t')
head(model_metrics_df_multiancestry_score)

### eur

In [None]:
df_ntile_norm_wide_eur = fread('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.wide_form.txt',
                               sep = '\t')
head(df_ntile_norm_wide_eur)

In [None]:
model_metrics_df_eur = read.csv('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                                sep = '\t')
head(model_metrics_df_eur)

### eur individuals multiancestry scores

In [None]:
df_ntile_norm_wide_eur_multiancestry_score = fread('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_percentile.wide_form.txt',
                                                   sep = '\t')
head(df_ntile_norm_wide_eur_multiancestry_score)

In [None]:
model_metrics_df_eur_multiancestry_score = read.csv('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.performance_metrics.txt',
                                                    sep = '\t')
head(model_metrics_df_eur_multiancestry_score)

### afr

In [None]:
df_ntile_norm_wide_afr = fread('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.wide_form.txt',
                               sep = '\t')
head(df_ntile_norm_wide_afr)

In [None]:
model_metrics_df_afr = read.csv('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                                sep = '\t')
head(model_metrics_df_afr)

### afr individuals multiancestry scores

In [None]:
df_ntile_norm_wide_afr_multiancestry_score = fread('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_percentile.wide_form.txt',
                                                   sep = '\t')
head(df_ntile_norm_wide_afr_multiancestry_score)

In [None]:
model_metrics_df_afr_multiancestry_score = read.csv('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.cv_glm.perf_model.performance_metrics.txt',
                                                    sep = '\t')
head(model_metrics_df_afr_multiancestry_score)

## write functions

In [None]:
"kappam.light" <-
function(ratings) {
    ratings <- as.matrix(na.omit(ratings))

    ns <- nrow(ratings)
    nr <- ncol(ratings)
    
    for (i in 1:(nr-1))
        for (j in (i+1):nr) {
            if ((i==1) & (j==(i+1))) kappas <- kappa2(ratings[,c(i,j)], weight="u")$value
            else kappas <- c(kappas, kappa2(ratings[,c(i,j)], weight="u")$value)
        }
        
    value <- mean(kappas)

    #Variance & Computation of p-value
    lev <- levels(as.factor(ratings))
    levlen <- length(levels(as.factor(ratings)))

    for (nri in 1:(nr-1))
        for (nrj in (nri+1):nr) {
            for (i in 1:levlen)
                for (j in 1:levlen) {
                    if (i!=j) {
                        r1i <- sum(ratings[,nri]==lev[i])
                        r2j <- sum(ratings[,nrj]==lev[j])
                        
                        # Convert to numeric before multiplication
                        r1i_numeric <- as.numeric(r1i)
                        r2j_numeric <- as.numeric(r2j)
                        if (!exists("dis")) dis <- r1i_numeric * r2j_numeric
                        else dis <- c(dis, r1i_numeric * r2j_numeric)
                    }
                }
                if (!exists("disrater")) disrater <- sum(dis)
                else disrater <- c(disrater,sum(dis))
                rm(dis)
            }
    B <- length(disrater) * prod(disrater)
    chanceP  <- 1-B/ns^(choose(nr,2)*2)
    varkappa <- chanceP/(ns*(1-chanceP))

    SEkappa <- sqrt(varkappa)
    u <- value/SEkappa
    p.value <- 2 * (1 - pnorm(abs(u)))

    rval <- structure(list(method = "Light's Kappa for m Raters",
                           subjects = ns, raters = nr,
                           irr.name = "Kappa", value = value,
                           stat.name = "z", statistic = u, p.value = p.value),
                      class="irrlist")
    return(rval)
}

In [None]:
create_binary_df <- function(df, ntile_list, percentile) {
    
    df_ntile_top <- df %>% 
    select(IID, all_of(ntile_list)) %>% 
    pivot_longer(cols = starts_with("ntile"), names_to = "score", values_to = "ntile")
    
    for (i in seq_along(percentile)) {
        binary_col <- paste0("ntile_", percentile[i])
        
        df_ntile_top <- df_ntile_top %>%
        mutate(!!binary_col := ifelse(ntile >= percentile[i], 1.0, 0.0))
    }
  
    return(df_ntile_top)
}

In [None]:
calculate_and_kappa <- function(df_ntile_top, percentile, ntile_list, metric_name) {
    binary_col <- paste0("ntile_", percentile)

    df_binary <- df_ntile_top %>%
    dplyr::select(score, !!binary_col, IID) %>%
    pivot_wider(names_from = score, values_from = !!binary_col) %>%
    dplyr::select(all_of(ntile_list))

    kappa_value <- as.numeric(kappam.light(df_binary)$value)

    return(data.frame(metric = metric_name, percentile = percentile, kappa_value = kappa_value))
}

## compute

### all

#### no stratification

In [None]:
model_metrics = model_metrics_df
ntile = df_ntile_norm_wide
metrics <- c("ROPE_005", "ROPE_01", "ROPE_02")

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results = outcome

#### stratified by clinical covariates

##### T2D

In [None]:
model_metrics = model_metrics_df
ntile = df_ntile_norm_wide
metrics <- c("ROPE_005", "ROPE_01", "ROPE_02")
filter_df = t2d_case

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_t2d_case = outcome

In [None]:
model_metrics = model_metrics_df
ntile = df_ntile_norm_wide
metrics <- c("ROPE_005", "ROPE_01", "ROPE_02")
filter_df = t2d_control

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_t2d_control = outcome

##### HT YK

In [None]:
model_metrics = model_metrics_df
ntile = df_ntile_norm_wide
metrics <- c("ROPE_005", "ROPE_01", "ROPE_02")
filter_df = ht_yk_case

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_ht_yk_case = outcome

In [None]:
model_metrics = model_metrics_df
ntile = df_ntile_norm_wide
metrics <- c("ROPE_005", "ROPE_01", "ROPE_02")
filter_df = ht_yk_control

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_ht_yk_control = outcome

##### age

In [None]:
model_metrics = model_metrics_df
ntile = df_ntile_norm_wide
metrics <- c("ROPE_005", "ROPE_01", "ROPE_02")
filter_df = age_less_60

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.age_less_60.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_age_less_60 = outcome

In [None]:
model_metrics = model_metrics_df
ntile = df_ntile_norm_wide
metrics <- c("ROPE_005", "ROPE_01", "ROPE_02")
filter_df = age_greater_equal_60

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.age_greater_equal_60.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_age_greater_equal_60 = outcome

### all individuals multiancestry scores

#### no stratification

In [None]:
model_metrics = model_metrics_df_multiancestry_score
ntile = df_ntile_norm_wide_multiancestry_score
metrics <- c("ROPE_01", "ROPE_02")

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_multiancestry_score = outcome

#### stratified by clinical covariates

##### T2D

In [None]:
model_metrics = model_metrics_df_multiancestry_score
ntile = df_ntile_norm_wide_multiancestry_score
metrics <- c("ROPE_01", "ROPE_02")
filter_df = t2d_case

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_multiancestry_score_t2d_case = outcome

In [None]:
model_metrics = model_metrics_df_multiancestry_score
ntile = df_ntile_norm_wide_multiancestry_score
metrics <- c("ROPE_01", "ROPE_02")
filter_df = t2d_control

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_multiancestry_score_t2d_control = outcome

##### HT YK

In [None]:
model_metrics = model_metrics_df_multiancestry_score
ntile = df_ntile_norm_wide_multiancestry_score
metrics <- c("ROPE_01", "ROPE_02")
filter_df = ht_yk_case

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_multiancestry_score_ht_yk_case = outcome

In [None]:
model_metrics = model_metrics_df_multiancestry_score
ntile = df_ntile_norm_wide_multiancestry_score
metrics <- c("ROPE_01", "ROPE_02")
filter_df = ht_yk_control

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_multiancestry_score_ht_yk_control = outcome

##### Age

In [None]:
model_metrics = model_metrics_df_multiancestry_score
ntile = df_ntile_norm_wide_multiancestry_score
metrics <- c("ROPE_01", "ROPE_02")
filter_df = age_less_60

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.age_less_60.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_multiancestry_score_age_less_60 = outcome

In [None]:
model_metrics = model_metrics_df_multiancestry_score
ntile = df_ntile_norm_wide_multiancestry_score
metrics <- c("ROPE_01", "ROPE_02")
filter_df = age_greater_equal_60

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.ALL.CKD.YK_Phenotyping.age_greater_equal_60.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_multiancestry_score_age_greater_equal_60 = outcome

### eur

#### no stratification

In [None]:
model_metrics = model_metrics_df_eur
ntile = df_ntile_norm_wide_eur
metrics <- c("ROPE_02")

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_eur = outcome

#### stratified by clinical covariates

##### T2D

In [None]:
model_metrics = model_metrics_df_eur
ntile = df_ntile_norm_wide_eur
metrics <- c("ROPE_02")
filter_df = t2d_case

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_eur_t2d_case = outcome

In [None]:
model_metrics = model_metrics_df_eur
ntile = df_ntile_norm_wide_eur
metrics <- c("ROPE_02")
filter_df = t2d_control

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_eur_t2d_control = outcome

##### HT YK

In [None]:
model_metrics = model_metrics_df_eur
ntile = df_ntile_norm_wide_eur
metrics <- c("ROPE_02")
filter_df = ht_yk_case

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_eur_ht_yk_case = outcome

In [None]:
model_metrics = model_metrics_df_eur
ntile = df_ntile_norm_wide_eur
metrics <- c("ROPE_02")
filter_df = ht_yk_control

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_eur_ht_yk_control = outcome

##### Age

In [None]:
model_metrics = model_metrics_df_eur
ntile = df_ntile_norm_wide_eur
metrics <- c("ROPE_02")
filter_df = age_less_60

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.age_less_60.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_eur_age_less_60 = outcome

In [None]:
model_metrics = model_metrics_df_eur
ntile = df_ntile_norm_wide_eur
metrics <- c("ROPE_02")
filter_df = age_greater_equal_60

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.age_greater_equal_60.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_eur_age_greater_equal_60 = outcome

### eur individuals multiancestry scores

#### no stratification

In [None]:
model_metrics = model_metrics_df_eur_multiancestry_score
ntile = df_ntile_norm_wide_eur_multiancestry_score
metrics <- c("ROPE_02")

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_eur_multiancestry_score = outcome

#### stratified by clinical covariates

##### T2D

In [None]:
model_metrics = model_metrics_df_eur_multiancestry_score
ntile = df_ntile_norm_wide_eur_multiancestry_score
metrics <- c("ROPE_02")
filter_df = t2d_case

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_eur_multiancestry_score_t2d_case = outcome

In [None]:
model_metrics = model_metrics_df_eur_multiancestry_score
ntile = df_ntile_norm_wide_eur_multiancestry_score
metrics <- c("ROPE_02")
filter_df = t2d_control

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_rbesults <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_eur_multiancestry_score_t2d_control = outcome

##### HT YK

In [None]:
model_metrics = model_metrics_df_eur_multiancestry_score
ntile = df_ntile_norm_wide_eur_multiancestry_score
metrics <- c("ROPE_02")
filter_df = ht_yk_case

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_eur_multiancestry_score_ht_yk_case = outcome

In [None]:
model_metrics = model_metrics_df_eur_multiancestry_score
ntile = df_ntile_norm_wide_eur_multiancestry_score
metrics <- c("ROPE_02")
filter_df = ht_yk_control

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_eur_multiancestry_score_ht_yk_control = outcome

##### Age

In [None]:
model_metrics = model_metrics_df_eur_multiancestry_score
ntile = df_ntile_norm_wide_eur_multiancestry_score
metrics <- c("ROPE_02")
filter_df = age_less_60

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.age_less_60.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_eur_multiancestry_score_age_less_60 = outcome

In [None]:
model_metrics = model_metrics_df_eur_multiancestry_score
ntile = df_ntile_norm_wide_eur_multiancestry_score
metrics <- c("ROPE_02")
filter_df = age_greater_equal_60

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.EUR.CKD.YK_Phenotyping.age_greater_equal_60.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_eur_multiancestry_score_age_greater_equal_60 = outcome

### afr

#### no stratification

In [None]:
model_metrics = model_metrics_df_afr
ntile = df_ntile_norm_wide_afr
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_afr = outcome

#### stratified by clinical covariates

##### T2D

In [None]:
model_metrics = model_metrics_df_afr
ntile = df_ntile_norm_wide_afr
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
filter_df = t2d_case

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_afr_t2d_case = outcome

In [None]:
model_metrics = model_metrics_df_afr
ntile = df_ntile_norm_wide_afr
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
filter_df = t2d_control

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_afr_t2d_control = outcome

##### HT YK

In [None]:
model_metrics = model_metrics_df_afr
ntile = df_ntile_norm_wide_afr
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
filter_df = ht_yk_case

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_afr_ht_yk_case = outcome

In [None]:
model_metrics = model_metrics_df_afr
ntile = df_ntile_norm_wide_afr
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
filter_df = ht_yk_control

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_afr_ht_yk_control = outcome

##### Age

In [None]:
model_metrics = model_metrics_df_afr
ntile = df_ntile_norm_wide_afr
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
filter_df = age_less_60

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.age_less_60.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_afr_age_less_60 = outcome

In [None]:
model_metrics = model_metrics_df_afr
ntile = df_ntile_norm_wide_afr
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
filter_df = age_greater_equal_60

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.age_greater_equal_60.PGS_Znorm2.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_afr_age_greater_equal_60 = outcome

### afr individuals multiancestry scores

#### no stratification

In [None]:
model_metrics = model_metrics_df_afr_multiancestry_score
ntile = df_ntile_norm_wide_afr_multiancestry_score
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_afr_multiancestry_score = outcome

#### stratified by clinical covariates

##### T2D

In [None]:
model_metrics = model_metrics_df_afr_multiancestry_score
ntile = df_ntile_norm_wide_afr_multiancestry_score
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
filter_df = t2d_case

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_afr_multiancestry_score_t2d_case = outcome

In [None]:
model_metrics = model_metrics_df_afr_multiancestry_score
ntile = df_ntile_norm_wide_afr_multiancestry_score
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
filter_df = t2d_control

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_afr_multiancestry_score_t2d_control = outcome

##### HT YK

In [None]:
model_metrics = model_metrics_df_afr_multiancestry_score
ntile = df_ntile_norm_wide_afr_multiancestry_score
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
filter_df = ht_yk_case

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_afr_multiancestry_score_ht_yk_case = outcome

In [None]:
model_metrics = model_metrics_df_afr_multiancestry_score
ntile = df_ntile_norm_wide_afr_multiancestry_score
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
filter_df = ht_yk_control

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_afr_multiancestry_score_ht_yk_control = outcome

##### Age

In [None]:
model_metrics = model_metrics_df_afr_multiancestry_score
ntile = df_ntile_norm_wide_afr_multiancestry_score
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
filter_df = age_less_60

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.age_less_60.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_afr_multiancestry_score_age_less_60 = outcome

In [None]:
model_metrics = model_metrics_df_afr_multiancestry_score
ntile = df_ntile_norm_wide_afr_multiancestry_score
metrics <- c("prob_dif", "ROPE_005", "ROPE_01", "ROPE_02")
filter_df = age_greater_equal_60

ntile = ntile %>%
filter(IID %in% filter_df$IID)

process_list <- function(metric) {
    list_data <- equiv_scores(model_metrics, metric, ntile = TRUE)
    df_ntile_top <- create_binary_df(ntile, list_data, percentiles)

    results <- lapply(percentiles, function(p) {
        calculate_and_kappa(df_ntile_top, p, list_data, metric)
    })

    return(do.call(rbind, results))
}

percentiles <- c(99, 98, 95, 90, 80, 70, 50)
kappa_results <- lapply(metrics, process_list)

outcome <- do.call(rbind, kappa_results)
write.table(outcome,
            'AOU.AFR.CKD.YK_Phenotyping.age_greater_equal_60.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)

combined_kappa_results_afr_multiancestry_score_age_greater_equal_60 = outcome

## make linegraph

### read in input files (if needed)

#### all

In [None]:
combined_kappa_results = read.csv('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                  sep = '\t')

In [None]:
combined_kappa_results_t2d_case = read.csv('AOU.ALL.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                           sep = '\t')

In [None]:
combined_kappa_results_t2d_control = read.csv('AOU.ALL.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                              sep = '\t')

In [None]:
combined_kappa_results_ht_yk_case = read.csv('AOU.ALL.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                             sep = '\t')

In [None]:
combined_kappa_results_ht_yk_control = read.csv('AOU.ALL.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                                sep = '\t')

In [None]:
combined_kappa_results_age_less_60 = read.csv('AOU.ALL.CKD.YK_Phenotyping.age_less_60.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                              sep = '\t')

In [None]:
combined_kappa_results_age_greater_equal_60 = read.csv('AOU.ALL.CKD.YK_Phenotyping.age_greater_equal_60.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                                       sep = '\t')

#### all individuals multiancestry scores

In [None]:
combined_kappa_results_multiancestry_score = read.csv('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                      sep = '\t')

In [None]:
combined_kappa_results_multiancestry_score_t2d_case = read.csv('AOU.ALL.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                               sep = '\t')

In [None]:
combined_kappa_results_multiancestry_score_t2d_control = read.csv('AOU.ALL.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                                  sep = '\t')

In [None]:
combined_kappa_results_multiancestry_score_ht_yk_case = read.csv('AOU.ALL.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                                 sep = '\t')

In [None]:
combined_kappa_results_multiancestry_score_ht_yk_control = read.csv('AOU.ALL.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                                    sep = '\t')

In [None]:
combined_kappa_results_multiancestry_score_age_less_60 = read.csv('AOU.ALL.CKD.YK_Phenotyping.age_less_60.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                                  sep = '\t')

In [None]:
combined_kappa_results_multiancestry_score_age_greater_equal_60 = read.csv('AOU.ALL.CKD.YK_Phenotyping.age_greater_equal_60.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                                           sep = '\t')

#### eur

In [None]:
combined_kappa_results_eur = read.csv('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                      sep = '\t')

In [None]:
combined_kappa_results_eur_t2d_case = read.csv('AOU.EUR.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                               sep = '\t')

In [None]:
combined_kappa_results_eur_t2d_control = read.csv('AOU.EUR.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                                  sep = '\t')

In [None]:
combined_kappa_results_eur_ht_yk_case = read.csv('AOU.EUR.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                                 sep = '\t')

In [None]:
combined_kappa_results_eur_ht_yk_control = read.csv('AOU.EUR.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                                    sep = '\t')

In [None]:
combined_kappa_results_eur_age_less_60 = read.csv('AOU.EUR.CKD.YK_Phenotyping.age_less_60.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                                  sep = '\t')

In [None]:
combined_kappa_results_eur_age_greater_equal_60 = read.csv('AOU.EUR.CKD.YK_Phenotyping.age_greater_equal_60.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                                           sep = '\t')

#### eur multiancestry scores

In [None]:
combined_kappa_results_eur_multiancestry_score = read.csv('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                          sep = '\t')

In [None]:
combined_kappa_results_eur_multiancestry_score_t2d_case = read.csv('AOU.EUR.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                                   sep = '\t')

In [None]:
combined_kappa_results_eur_multiancestry_score_t2d_control = read.csv('AOU.EUR.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                                      sep = '\t')

In [None]:
combined_kappa_results_eur_multiancestry_score_ht_yk_case = read.csv('AOU.EUR.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                                     sep = '\t')

In [None]:
combined_kappa_results_eur_multiancestry_score_ht_yk_control = read.csv('AOU.EUR.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                                        sep = '\t')

In [None]:
combined_kappa_results_eur_multiancestry_score_age_less_60 = read.csv('AOU.EUR.CKD.YK_Phenotyping.age_less_60.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                                      sep = '\t')

In [None]:
combined_kappa_results_eur_multiancestry_score_age_greater_equal_60 = read.csv('AOU.EUR.CKD.YK_Phenotyping.age_greater_equal_60.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                                               sep = '\t')

#### afr

In [None]:
combined_kappa_results_afr = read.csv('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                      sep = '\t')

In [None]:
combined_kappa_results_afr_t2d_case = read.csv('AOU.AFR.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                               sep = '\t')

In [None]:
combined_kappa_results_afr_t2d_control = read.csv('AOU.AFR.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                                  sep = '\t')

In [None]:
combined_kappa_results_afr_ht_yk_case = read.csv('AOU.AFR.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                                 sep = '\t')

In [None]:
combined_kappa_results_afr_ht_yk_control = read.csv('AOU.AFR.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                                    sep = '\t')

In [None]:
combined_kappa_results_afr_age_less_60 = read.csv('AOU.AFR.CKD.YK_Phenotyping.age_less_60.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                                  sep = '\t')

In [None]:
combined_kappa_results_afr_age_greater_equal_60 = read.csv('AOU.AFR.CKD.YK_Phenotyping.age_greater_equal_60.PGS_Znorm2.individual_agreement.LightsKappa.txt',
                                                           sep = '\t')

#### afr multiancestry scores

In [None]:
combined_kappa_results_afr_multiancestry_score = read.csv('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                          sep = '\t')

In [None]:
combined_kappa_results_afr_multiancestry_score_t2d_case = read.csv('AOU.AFR.CKD.YK_Phenotyping.T2D_cases.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                                   sep = '\t')

In [None]:
combined_kappa_results_afr_multiancestry_score_t2d_control = read.csv('AOU.AFR.CKD.YK_Phenotyping.T2D_controls.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                                      sep = '\t')

In [None]:
combined_kappa_results_afr_multiancestry_score_ht_yk_case = read.csv('AOU.AFR.CKD.YK_Phenotyping.HT_YK_cases.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                                     sep = '\t')

In [None]:
combined_kappa_results_afr_multiancestry_score_ht_yk_control = read.csv('AOU.AFR.CKD.YK_Phenotyping.HT_YK_controls.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                                        sep = '\t')

In [None]:
combined_kappa_results_afr_multiancestry_score_age_less_60 = read.csv('AOU.AFR.CKD.YK_Phenotyping.age_less_60.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                                      sep = '\t')

In [None]:
combined_kappa_results_afr_multiancestry_score_age_greater_equal_60 = read.csv('AOU.AFR.CKD.YK_Phenotyping.age_greater_equal_60.PGS_Znorm2.multiancestry_scores.individual_agreement.LightsKappa.txt',
                                                                               sep = '\t')

### merge

#### no stratification

In [None]:
kappa_merge = combined_kappa_results_afr_multiancestry_score %>%
rename('AFR_indiv.multiancestry_scores' = 'kappa_value') %>%
left_join(combined_kappa_results_afr, by = c('metric', 'percentile')) %>%
rename('AFR_indiv.all_scores' = 'kappa_value') %>%
left_join(combined_kappa_results_eur_multiancestry_score, by = c('metric', 'percentile')) %>%
rename('EUR_indiv.multiancestry_scores' = 'kappa_value') %>%
left_join(combined_kappa_results_eur, by = c('metric', 'percentile')) %>%
rename('EUR_indiv.all_scores' = 'kappa_value') %>%
left_join(combined_kappa_results_multiancestry_score, by = c('metric', 'percentile')) %>%
rename('ALL_indiv.multiancestry_scores' = 'kappa_value') %>%
left_join(combined_kappa_results, by = c('metric', 'percentile')) %>%
rename('ALL_indiv.all_scores' = 'kappa_value') %>%
mutate(metric = gsub('prob_dif', 'Statistically_Equivalent', metric)) %>%
mutate(metric = gsub('ROPE_005', 'ROPE_0.005', metric)) %>%
mutate(metric = gsub('ROPE_01', 'ROPE_0.01', metric)) %>%
mutate(metric = gsub('ROPE_02', 'ROPE_0.02', metric))
print(kappa_merge.shape)

#### stratified by clinical covariates

##### all

In [None]:
kappa_merge_clinical = combined_kappa_results %>%
rename('No Stratification' = 'kappa_value') %>%
left_join(combined_kappa_results_t2d_case, by = c('metric', 'percentile')) %>%
rename('T2D cases' = 'kappa_value') %>%
left_join(combined_kappa_results_t2d_control, by = c('metric', 'percentile')) %>%
rename('T2D controls' = 'kappa_value') %>%
left_join(combined_kappa_results_ht_yk_case, by = c('metric', 'percentile')) %>%
rename('HT cases' = 'kappa_value') %>%
left_join(combined_kappa_results_ht_yk_control, by = c('metric', 'percentile')) %>%
rename('HT controls' = 'kappa_value') %>%
left_join(combined_kappa_results_age_less_60, by = c('metric', 'percentile')) %>%
rename('Age < 60' = 'kappa_value') %>%
left_join(combined_kappa_results_age_greater_equal_60, by = c('metric', 'percentile')) %>%
rename('Age >= 60' = 'kappa_value') %>%
mutate(metric = gsub('prob_dif', 'Statistically_Equivalent', metric)) %>%
mutate(metric = gsub('ROPE_005', 'ROPE_0.005', metric)) %>%
mutate(metric = gsub('ROPE_01', 'ROPE_0.01', metric)) %>%
mutate(metric = gsub('ROPE_02', 'ROPE_0.02', metric))
dim(kappa_merge_clinical)

##### all individuals multiancestry scores

In [None]:
kappa_merge_multiancestry_score_clinical = combined_kappa_results_multiancestry_score %>%
rename('No Stratification' = 'kappa_value') %>%
left_join(combined_kappa_results_multiancestry_score_t2d_case, by = c('metric', 'percentile')) %>%
rename('T2D cases' = 'kappa_value') %>%
left_join(combined_kappa_results_multiancestry_score_t2d_control, by = c('metric', 'percentile')) %>%
rename('T2D controls' = 'kappa_value') %>%
left_join(combined_kappa_results_multiancestry_score_ht_yk_case, by = c('metric', 'percentile')) %>%
rename('HT cases' = 'kappa_value') %>%
left_join(combined_kappa_results_multiancestry_score_ht_yk_control, by = c('metric', 'percentile')) %>%
rename('HT controls' = 'kappa_value') %>%
left_join(combined_kappa_results_multiancestry_score_age_less_60, by = c('metric', 'percentile')) %>%
rename('Age < 60' = 'kappa_value') %>%
left_join(combined_kappa_results_multiancestry_score_age_greater_equal_60, by = c('metric', 'percentile')) %>%
rename('Age >= 60' = 'kappa_value') %>%
mutate(metric = gsub('prob_dif', 'Statistically_Equivalent', metric)) %>%
mutate(metric = gsub('ROPE_005', 'ROPE_0.005', metric)) %>%
mutate(metric = gsub('ROPE_01', 'ROPE_0.01', metric)) %>%
mutate(metric = gsub('ROPE_02', 'ROPE_0.02', metric))
dim(kappa_merge_multiancestry_score_clinical)

##### eur

In [None]:
kappa_merge_eur_clinical = combined_kappa_results_eur %>%
rename('No Stratification' = 'kappa_value') %>%
left_join(combined_kappa_results_eur_t2d_case, by = c('metric', 'percentile')) %>%
rename('T2D cases' = 'kappa_value') %>%
left_join(combined_kappa_results_eur_t2d_control, by = c('metric', 'percentile')) %>%
rename('T2D controls' = 'kappa_value') %>%
left_join(combined_kappa_results_eur_ht_yk_case, by = c('metric', 'percentile')) %>%
rename('HT cases' = 'kappa_value') %>%
left_join(combined_kappa_results_eur_ht_yk_control, by = c('metric', 'percentile')) %>%
rename('HT controls' = 'kappa_value') %>%
left_join(combined_kappa_results_eur_age_less_60, by = c('metric', 'percentile')) %>%
rename('Age < 60' = 'kappa_value') %>%
left_join(combined_kappa_results_eur_age_greater_equal_60, by = c('metric', 'percentile')) %>%
rename('Age >= 60' = 'kappa_value') %>%
mutate(metric = gsub('prob_dif', 'Statistically_Equivalent', metric)) %>%
mutate(metric = gsub('ROPE_005', 'ROPE_0.005', metric)) %>%
mutate(metric = gsub('ROPE_01', 'ROPE_0.01', metric)) %>%
mutate(metric = gsub('ROPE_02', 'ROPE_0.02', metric))
dim(kappa_merge_eur_clinical)

##### eur individuals multiancestry scores

In [None]:
kappa_merge_eur_multiancestry_score_clinical = combined_kappa_results_eur_multiancestry_score %>%
rename('No Stratification' = 'kappa_value') %>%
left_join(combined_kappa_results_eur_multiancestry_score_t2d_case, by = c('metric', 'percentile')) %>%
rename('T2D cases' = 'kappa_value') %>%
left_join(combined_kappa_results_eur_multiancestry_score_t2d_control, by = c('metric', 'percentile')) %>%
rename('T2D controls' = 'kappa_value') %>%
left_join(combined_kappa_results_eur_multiancestry_score_ht_yk_case, by = c('metric', 'percentile')) %>%
rename('HT cases' = 'kappa_value') %>%
left_join(combined_kappa_results_eur_multiancestry_score_ht_yk_control, by = c('metric', 'percentile')) %>%
rename('HT controls' = 'kappa_value') %>%
left_join(combined_kappa_results_eur_multiancestry_score_age_less_60, by = c('metric', 'percentile')) %>%
rename('Age < 60' = 'kappa_value') %>%
left_join(combined_kappa_results_eur_multiancestry_score_age_greater_equal_60, by = c('metric', 'percentile')) %>%
rename('Age >= 60' = 'kappa_value') %>%
mutate(metric = gsub('prob_dif', 'Statistically_Equivalent', metric)) %>%
mutate(metric = gsub('ROPE_005', 'ROPE_0.005', metric)) %>%
mutate(metric = gsub('ROPE_01', 'ROPE_0.01', metric)) %>%
mutate(metric = gsub('ROPE_02', 'ROPE_0.02', metric))
dim(kappa_merge_eur_multiancestry_score_clinical)

##### afr

In [None]:
kappa_merge_afr_clinical = combined_kappa_results_afr %>%
rename('No Stratification' = 'kappa_value') %>%
left_join(combined_kappa_results_afr_t2d_case, by = c('metric', 'percentile')) %>%
rename('T2D cases' = 'kappa_value') %>%
left_join(combined_kappa_results_afr_t2d_control, by = c('metric', 'percentile')) %>%
rename('T2D controls' = 'kappa_value') %>%
left_join(combined_kappa_results_afr_ht_yk_case, by = c('metric', 'percentile')) %>%
rename('HT cases' = 'kappa_value') %>%
left_join(combined_kappa_results_afr_ht_yk_control, by = c('metric', 'percentile')) %>%
rename('HT controls' = 'kappa_value') %>%
left_join(combined_kappa_results_afr_age_less_60, by = c('metric', 'percentile')) %>%
rename('Age < 60' = 'kappa_value') %>%
left_join(combined_kappa_results_afr_age_greater_equal_60, by = c('metric', 'percentile')) %>%
rename('Age >= 60' = 'kappa_value') %>%
mutate(metric = gsub('prob_dif', 'Statistically_Equivalent', metric)) %>%
mutate(metric = gsub('ROPE_005', 'ROPE_0.005', metric)) %>%
mutate(metric = gsub('ROPE_01', 'ROPE_0.01', metric)) %>%
mutate(metric = gsub('ROPE_02', 'ROPE_0.02', metric))
dim(kappa_merge_afr_clinical)

##### afr individuals multiancestry scores

In [None]:
kappa_merge_afr_multiancestry_score_clinical = combined_kappa_results_afr_multiancestry_score %>%
rename('No Stratification' = 'kappa_value') %>%
left_join(combined_kappa_results_afr_multiancestry_score_t2d_case, by = c('metric', 'percentile')) %>%
rename('T2D cases' = 'kappa_value') %>%
left_join(combined_kappa_results_afr_multiancestry_score_t2d_control, by = c('metric', 'percentile')) %>%
rename('T2D controls' = 'kappa_value') %>%
left_join(combined_kappa_results_afr_multiancestry_score_ht_yk_case, by = c('metric', 'percentile')) %>%
rename('HT cases' = 'kappa_value') %>%
left_join(combined_kappa_results_afr_multiancestry_score_ht_yk_control, by = c('metric', 'percentile')) %>%
rename('HT controls' = 'kappa_value') %>%
left_join(combined_kappa_results_afr_multiancestry_score_age_less_60, by = c('metric', 'percentile')) %>%
rename('Age < 60' = 'kappa_value') %>%
left_join(combined_kappa_results_afr_multiancestry_score_age_greater_equal_60, by = c('metric', 'percentile')) %>%
rename('Age >= 60' = 'kappa_value') %>%
mutate(metric = gsub('prob_dif', 'Statistically_Equivalent', metric)) %>%
mutate(metric = gsub('ROPE_005', 'ROPE_0.005', metric)) %>%
mutate(metric = gsub('ROPE_01', 'ROPE_0.01', metric)) %>%
mutate(metric = gsub('ROPE_02', 'ROPE_0.02', metric))
dim(kappa_merge_afr_multiancestry_score_clinical)

### convert to long form

#### no stratification

In [None]:
kappa_merge_long <- kappa_merge %>%
pivot_longer(cols = ends_with("_scores"),
             names_to = "score_type",
             values_to = "score_value") %>%
mutate(metric = factor(metric, levels = c("Statistically_Equivalent", "ROPE_0.005", "ROPE_0.01", "ROPE_0.02"))) %>%
mutate(score_type = gsub('multiancestry', 'eur_multiancestry', score_type))
head(kappa_merge_long)

#### stratified by clinical covariates

In [None]:
kappa_merge_clinical_long <- kappa_merge_clinical %>%
pivot_longer(cols = -c(metric, percentile),
             names_to = "score_type",
             values_to = "score_value") %>%
mutate(metric = factor(metric, levels = c("Statistically_Equivalent", "ROPE_0.005", "ROPE_0.01", "ROPE_0.02")))
head(kappa_merge_clinical_long)

In [None]:
kappa_merge_multiancestry_score_clinical_long <- kappa_merge_multiancestry_score_clinical %>%
pivot_longer(cols = -c(metric, percentile),
             names_to = "score_type",
             values_to = "score_value") %>%
mutate(metric = factor(metric, levels = c("Statistically_Equivalent", "ROPE_0.005", "ROPE_0.01", "ROPE_0.02")))
head(kappa_merge_multiancestry_score_clinical_long)

In [None]:
kappa_merge_eur_clinical_long <- kappa_merge_eur_clinical %>%
pivot_longer(cols = -c(metric, percentile),
             names_to = "score_type",
             values_to = "score_value") %>%
mutate(metric = factor(metric, levels = c("Statistically_Equivalent", "ROPE_0.005", "ROPE_0.01", "ROPE_0.02")))
head(kappa_merge_eur_clinical_long)

In [None]:
kappa_merge_eur_multiancestry_score_clinical_long <- kappa_merge_eur_multiancestry_score_clinical %>%
pivot_longer(cols = -c(metric, percentile),
             names_to = "score_type",
             values_to = "score_value") %>%
mutate(metric = factor(metric, levels = c("Statistically_Equivalent", "ROPE_0.005", "ROPE_0.01", "ROPE_0.02")))
head(kappa_merge_eur_multiancestry_score_clinical_long)

In [None]:
kappa_merge_afr_clinical_long <- kappa_merge_afr_clinical %>%
pivot_longer(cols = -c(metric, percentile),
             names_to = "score_type",
             values_to = "score_value") %>%
mutate(metric = factor(metric, levels = c("Statistically_Equivalent", "ROPE_0.005", "ROPE_0.01", "ROPE_0.02")))
head(kappa_merge_afr_clinical_long)

In [None]:
kappa_merge_afr_multiancestry_score_clinical_long <- kappa_merge_afr_multiancestry_score_clinical %>%
pivot_longer(cols = -c(metric, percentile),
             names_to = "score_type",
             values_to = "score_value") %>%
mutate(metric = factor(metric, levels = c("Statistically_Equivalent", "ROPE_0.005", "ROPE_0.01", "ROPE_0.02")))
head(kappa_merge_afr_multiancestry_score_clinical_long)

### make linegraph

In [None]:
options(repr.plot.width = 12, repr.plot.height = 5)

#### no stratification

In [None]:
kappa_plot = ggplot(kappa_merge_long, aes(x = percentile, y = score_value, color = score_type)) +
geom_point() +
geom_line(size = 1) +
facet_wrap(~ metric, nrow = 1) +
labs(title = "AOU CKD PGS Light's Kappa",
     x = "Percentile",
     y = "Light's Kappa",
     color = "Score") +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

kappa_plot
ggsave('AOU.CKD.YK_Phenotyping.PGS_Znorm2.individual_agreement.LightsKappa.line_graph.png',
       width = 12,
       height = 3,
       dpi = 300,
       bg = "white")

#### stratified by clinical covariates

##### all

In [None]:
kappa_plot = ggplot(kappa_merge_clinical_long, aes(x = percentile, y = score_value, color = score_type)) +
geom_point() +
geom_line(size = 1) +
facet_wrap(~ metric, nrow = 1) +
labs(title = "AOU ALL CKD PGS Light's Kappa",
     subtitle = 'Stratified by Clinical Covariates',
     x = "Percentile",
     y = "Light's Kappa",
     color = "Stratification") +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

kappa_plot
ggsave('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.clinical_covariate_stratification.T2D.HT_YK.AGE.individual_agreement.LightsKappa.line_graph.png',
       width = 12,
       height = 5,
       dpi = 300,
       bg = "white")

##### all individuals multiancestry scores

In [None]:
options(repr.plot.width = 9, repr.plot.height = 5)

In [None]:
kappa_plot = ggplot(kappa_merge_multiancestry_score_clinical_long, aes(x = percentile, y = score_value, color = score_type)) +
geom_point() +
geom_line(size = 1) +
facet_wrap(~ metric, nrow = 1) +
labs(title = "AOU ALL CKD PGS Light's Kappa- EUR/Multiancestry Scores Only",
     subtitle = 'Stratified by Clinical Covariates',
     x = "Percentile",
     y = "Light's Kappa",
     color = "Stratification") +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

kappa_plot
ggsave('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.clinical_covariate_stratification.T2D.HT_YK.AGE.individual_agreement.LightsKappa.line_graph.png',
       width = 9,
       height = 5,
       dpi = 300,
       bg = "white")

##### eur

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)

In [None]:
kappa_plot = ggplot(kappa_merge_eur_clinical_long, aes(x = percentile, y = score_value, color = score_type)) +
geom_point() +
geom_line(size = 1) +
facet_wrap(~ metric, nrow = 1) +
labs(title = "AOU EUR CKD PGS Light's Kappa",
     subtitle = 'Stratified by Clinical Covariates',
     x = "Percentile",
     y = "Light's Kappa",
     color = "Stratification") +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

kappa_plot
ggsave('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.clinical_covariate_stratification.T2D.HT_YK.AGE.individual_agreement.LightsKappa.line_graph.png',
       width = 6,
       height = 5,
       dpi = 300,
       bg = "white")

##### eur individuals multiancestry scores

In [None]:
kappa_plot = ggplot(kappa_merge_eur_multiancestry_score_clinical_long, aes(x = percentile, y = score_value, color = score_type)) +
geom_point() +
geom_line(size = 1) +
facet_wrap(~ metric, nrow = 1) +
labs(title = "AOU EUR CKD PGS Light's Kappa- EUR/Multiancestry Scores Only",
     subtitle = 'Stratified by Clinical Covariates',
     x = "Percentile",
     y = "Light's Kappa",
     color = "Stratification") +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

kappa_plot
ggsave('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.clinical_covariate_stratification.T2D.HT_YK.AGE.individual_agreement.LightsKappa.line_graph.png',
       width = 6,
       height = 5,
       dpi = 300,
       bg = "white")

##### afr

In [None]:
options(repr.plot.width = 15, repr.plot.height = 5)

In [None]:
kappa_plot = ggplot(kappa_merge_afr_clinical_long, aes(x = percentile, y = score_value, color = score_type)) +
geom_point() +
geom_line(size = 1) +
facet_wrap(~ metric, nrow = 1) +
labs(title = "AOU AFR CKD PGS Light's Kappa",
     subtitle = 'Stratified by Clinical Covariates',
     x = "Percentile",
     y = "Light's Kappa",
     color = "Stratification") +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

kappa_plot
ggsave('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.clinical_covariate_stratification.T2D.HT_YK.AGE.individual_agreement.LightsKappa.line_graph.png',
       width = 15,
       height = 5,
       dpi = 300,
       bg = "white")

##### afr individuals multiancestry scores

In [None]:
kappa_plot = ggplot(kappa_merge_afr_multiancestry_score_clinical_long, aes(x = percentile, y = score_value, color = score_type)) +
geom_point() +
geom_line(size = 1) +
facet_wrap(~ metric, nrow = 1) +
labs(title = "AOU AFR CKD PGS Light's Kappa- EUR/Multiancestry Scores Only",
     subtitle = 'Stratified by Clinical Covariates',
     x = "Percentile",
     y = "Light's Kappa",
     color = "Stratification") +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

kappa_plot
ggsave('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.multiancestry_scores.clinical_covariate_stratification.T2D.HT_YK.AGE.individual_agreement.LightsKappa.line_graph.png',
       width = 15,
       height = 5,
       dpi = 300,
       bg = "white")

# pairwise correlations between score percentile distributions

## read in percentile file (if needed)

### all

In [None]:
df_ntile_norm_wide = fread('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.wide_form.txt',
                           sep = '\t')
head(df_ntile_norm_wide)
colnames(df_ntile_norm_wide)

In [None]:
model_metrics_df = read.csv('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                            sep = '\t')
head(model_metrics_df)

### eur

In [None]:
df_ntile_norm_wide_eur = fread('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.wide_form.txt',
                               sep = '\t')
head(df_ntile_norm_wide_eur)
colnames(df_ntile_norm_wide_eur)

In [None]:
model_metrics_df_eur = read.csv('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                                sep = '\t')
head(model_metrics_df_eur)

### afr

In [None]:
df_ntile_norm_wide_afr = fread('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile.wide_form.txt',
                               sep = '\t')
head(df_ntile_norm_wide_afr)
colnames(df_ntile_norm_wide_afr)

In [None]:
model_metrics_df_afr = read.csv('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.cv_glm.perf_model.performance_metrics.txt',
                                sep = '\t')
head(model_metrics_df_afr)

## create pgs list

In [None]:
pgs_include = unique(model_metrics_df$model)
length(pgs_include)

## calculate correlations

### all

In [None]:
# calculate R for all pairs
all_correlations <- df_ntile_norm_wide %>% 
select(starts_with("ntile_")) %>% 
rename_with(~ gsub("ntile_", "", .), .cols = everything())
  
calculate_correlations <- function(pgs_list) {
  correlation_coefficients <- cor(all_correlations[, ..pgs_list], all_correlations)
  correlation_df <- as.data.frame(correlation_coefficients, row.names = names(all_correlations)) %>%
    pivot_longer(cols = everything(), names_to = "model", values_to = "r")
  
  # Return the data frame with an additional column for the PGS ID
  correlation_df$PGS_ID <- pgs_list
  return(correlation_df)
}

# Apply the function to each PGS ID and combine results into a single data frame
all_correlations_df <- do.call(rbind, lapply(pgs_include, calculate_correlations)) %>% 
rename(model_1 = model, model_2 = PGS_ID)
head(all_correlations_df)
write.table(all_correlations_df,
           'AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.score_level_percentile.pairwise_correlations.txt',
           sep = '\t',
           col.names = T,
           row.names = F,
           quote = F)
length(unique(all_correlations_df$model_1))
length(unique(all_correlations_df$model_2))

### eur

In [None]:
# calculate R for all pairs
all_correlations_eur <- df_ntile_norm_wide_eur %>% 
select(starts_with("ntile_")) %>% 
rename_with(~ gsub("ntile_", "", .), .cols = everything())
  
calculate_correlations <- function(pgs_list) {
  correlation_coefficients <- cor(all_correlations_eur[, ..pgs_list], all_correlations_eur)
  correlation_df <- as.data.frame(correlation_coefficients, row.names = names(all_correlations_eur)) %>%
    pivot_longer(cols = everything(), names_to = "model", values_to = "r")
  
  # Return the data frame with an additional column for the PGS ID
  correlation_df$PGS_ID <- pgs_list
  return(correlation_df)
}

# Apply the function to each PGS ID and combine results into a single data frame
all_correlations_df_eur <- do.call(rbind, lapply(pgs_include, calculate_correlations)) %>% 
rename(model_1 = model, model_2 = PGS_ID)
head(all_correlations_df_eur)
write.table(all_correlations_df_eur,
            'AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.score_level_percentile.pairwise_correlations.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)
length(unique(all_correlations_df_eur$model_1))
length(unique(all_correlations_df_eur$model_2))

### afr

In [None]:
# calculate R for all pairs
all_correlations_afr <- df_ntile_norm_wide_afr %>% 
select(starts_with("ntile_")) %>% 
rename_with(~ gsub("ntile_", "", .), .cols = everything())
  
calculate_correlations <- function(pgs_list) {
  correlation_coefficients <- cor(all_correlations_afr[, ..pgs_list], all_correlations_afr)
  correlation_df <- as.data.frame(correlation_coefficients, row.names = names(all_correlations_afr)) %>%
    pivot_longer(cols = everything(), names_to = "model", values_to = "r")
  
  # Return the data frame with an additional column for the PGS ID
  correlation_df$PGS_ID <- pgs_list
  return(correlation_df)
}

# Apply the function to each PGS ID and combine results into a single data frame
all_correlations_df_afr <- do.call(rbind, lapply(pgs_include, calculate_correlations)) %>% 
rename(model_1 = model, model_2 = PGS_ID)
head(all_correlations_df_afr)
write.table(all_correlations_df_afr,
            'AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.score_level_percentile.pairwise_correlations.txt',
            sep = '\t',
            col.names = T,
            row.names = F,
            quote = F)
length(unique(all_correlations_df_afr$model_1))
length(unique(all_correlations_df_afr$model_2))

## create cohort order

In [None]:
meta_phe = c('AFR.Phe_585.3.PRScsx',
             'EAS.Phe_585.3.PRScsx',
             'EUR.Phe_585.3.PRScsx',
             'AFR.eGFR.flip.PRScsx',
             'AMR.eGFR.flip.PRScsx',
             'EAS.eGFR.flip.PRScsx',
             'EUR.eGFR.flip.PRScsx')

In [None]:
PGS_cohort_order = c(ckdgen_eur$V1, ckdgen_multi$V1, ukbb$V1, meta_phe)
length(PGS_cohort_order)
PGS_cohort_order

## make heatmap

### all

In [None]:
# heat map
corr_heatmap <- all_correlations_df  %>%
filter(as.numeric(factor(model_1, levels = PGS_cohort_order)) <= as.numeric(factor(model_2, levels = PGS_cohort_order))) %>%
arrange(factor(model_2, levels = PGS_cohort_order)) %>% 
arrange(factor(model_1, levels = PGS_cohort_order)) %>% 
ggplot(aes(x = fct_inorder(model_2), y = fct_inorder(model_1), fill = r)) +
geom_tile(color = "black") +
scale_fill_gradient2(low = "skyblue", mid = "white", high = "orange", limits = c(-1, 1), name = "*r*") +
scale_shape_manual(values = c(8), na.translate = F, name = NULL) +
guides(fill = guide_colourbar(order = 1)) +
labs(x = NULL,
     y = NULL,
     title = ("AOU ALL CKD PGS Percentile Correlation")) +
coord_equal() +
theme_bw(base_size = 16) +
theme(panel.grid.major = element_blank(),
      plot.background = element_rect(fill = "transparent", color = NA),
      axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1),
      legend.title = ggtext::element_markdown(),
      plot.title.position = "plot")
    
corr_heatmap
ggsave('AOU.ALL.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile_correlation.heatmap.png',
       width = 10,
       height = 8,
       dpi = 300)

### eur

In [None]:
# heat map
corr_heatmap_eur <- all_correlations_df_eur  %>%
filter(as.numeric(factor(model_1, levels = PGS_cohort_order)) <= as.numeric(factor(model_2, levels = PGS_cohort_order))) %>%
arrange(factor(model_2, levels = PGS_cohort_order)) %>% 
arrange(factor(model_1, levels = PGS_cohort_order)) %>% 
ggplot(aes(x = fct_inorder(model_2), y = fct_inorder(model_1), fill = r)) +
geom_tile(color = "black") +
scale_fill_gradient2(low = "skyblue", mid = "white", high = "orange", limits = c(-1, 1), name = "*r*") +
scale_shape_manual(values = c(8), na.translate = F, name = NULL) +
guides(fill = guide_colourbar(order = 1)) +
labs(x = NULL,
     y = NULL,
     title = ("AOU EUR CKD PGS Percentile Correlation")) +
coord_equal() +
theme_bw(base_size = 16) +
theme(panel.grid.major = element_blank(),
      plot.background = element_rect(fill = "transparent", color = NA),
      axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1),
      legend.title = ggtext::element_markdown(),
      plot.title.position = "plot")
    
corr_heatmap_eur
ggsave('AOU.EUR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile_correlation.heatmap.png',
       width = 10,
       height = 8,
       dpi = 300)

### afr

In [None]:
# heat map
corr_heatmap_afr <- all_correlations_df_afr  %>%
filter(as.numeric(factor(model_1, levels = PGS_cohort_order)) <= as.numeric(factor(model_2, levels = PGS_cohort_order))) %>%
arrange(factor(model_2, levels = PGS_cohort_order)) %>% 
arrange(factor(model_1, levels = PGS_cohort_order)) %>% 
ggplot(aes(x = fct_inorder(model_2), y = fct_inorder(model_1), fill = r)) +
geom_tile(color = "black") +
scale_fill_gradient2(low = "skyblue", mid = "white", high = "orange", limits = c(-1, 1), name = "*r*") +
scale_shape_manual(values = c(8), na.translate = F, name = NULL) +
guides(fill = guide_colourbar(order = 1)) +
labs(x = NULL,
     y = NULL,
     title = ("AOU AFR CKD PGS Percentile Correlation")) +
coord_equal() +
theme_bw(base_size = 16) +
theme(panel.grid.major = element_blank(),
      plot.background = element_rect(fill = "transparent", color = NA),
      axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1),
      legend.title = ggtext::element_markdown(),
      plot.title.position = "plot")
    
corr_heatmap_afr
ggsave('AOU.AFR.CKD.YK_Phenotyping.PGS_Znorm2.individual_percentile_correlation.heatmap.png',
       width = 10,
       height = 8,
       dpi = 300)

## get correlation summary data

### all

In [None]:
all_correlations_df%>%
filter(r < 0.99)%>%
summary()

### eur

In [None]:
all_correlations_df_eur%>%
filter(r < 0.99)%>%
summary()

### afr

In [None]:
all_correlations_df_afr%>%
filter(r < 0.99)%>%
summary()