# 4. Scores

In [None]:
try(library(tidyverse), silent=TRUE)
library(lubridate)
library(data.table)
library(glue)
library(jsonlite)
dataset_name = "210616_centres_dask"
data_path = "/data/analysis/ag-reils/ag-reils-shared/cardioRS/data"
dataset_path = glue("{data_path}/3_datasets_post/{dataset_name}")
dataset_path_pre = glue("{data_path}/2_datasets_pre/{dataset_name}")

In [None]:
project_label = "21_PGS_Revision"
project_path = glue("/data/analysis/ag-reils/ag-reils-shared/cardioRS/results/projects/{project_label}")
figures_path = glue("{project_path}/figures")
data_results_path = glue("{project_path}/data")

In [None]:
description = arrow::read_feather(glue("{dataset_path}/description.feather"))

In [None]:
files = c()
for (i in 0:21){
    files = c(files, glue("{dataset_path}/partition_{i}/test/data_imputed.feather"))
}

In [None]:
data = map(files, arrow::read_feather) %>% bind_rows()

## Missing values

In [None]:
na_count <-data.frame(sapply(data, function(y) sum(length(which(is.na(y))))))
na_count %>% filter(sapply(data, function(y) sum(length(which(is.na(y)))))>0)

In [None]:
map_smoking = jsonlite::fromJSON(str_replace_all(str_replace_all((description %>% filter(covariate == "smoking_status"))$mapping, "'", '"'), ", nan: -2", ""))
map_smoking = setNames(names(map_smoking), map_smoking)
map_gender = jsonlite::fromJSON(str_replace_all(str_replace_all((description %>% filter(covariate == "sex"))$mapping, "'", '"'), ", nan: -2", ""))
map_gender = setNames(names(map_gender), map_gender)
map_ethnicity = jsonlite::fromJSON(str_replace_all(str_replace_all((description %>% filter(covariate == "ethnic_background"))$mapping, "'", '"'), ", nan: -2", ""))
map_ethnicity = setNames(names(map_ethnicity), map_ethnicity)

In [None]:
data$smoking_status = recode(data$smoking_status, !!!map_smoking)
data$sex = recode(data$sex, !!!map_gender)
data$ethnic_background = recode(data$ethnic_background, !!!map_ethnicity)

# Scores

## ESC SCORE (Conroy 2003)

[Conroy 2003](http://eurheartj.oxfordjournals.org/content/24/11/987.full.pdf)

In [None]:
calculateRisk <- function(age, cholesterol, SBP, currentSmoker, betaSmoker, betaSBP, betaChol, coefs) {
  # step 1 risks
  Sage0 = exp(-exp(coefs["alpha"])*(age - 20)^coefs["p"])
  Sage10 = exp(-exp(coefs["alpha"])*(age - 10)^coefs["p"])
  # step 2 weights
  w = betaChol*(cholesterol - 6) + betaSBP*(SBP - 120) + betaSmoker*currentSmoker
  # step 3 weighted risks
  Sage   = (Sage0)^exp(w) 
  Sage1 = (Sage10)^exp(w) 
  # step 4 - 10 years survival
  S10 = Sage1/Sage
  # step 5 - endpoint
  Risk10 = 1 - S10
  Risk10
}

calculateScoreEur <- function(age, cholesterol, SBP, currentSmoker, gender = "Men", risk = "Low risk") {
  betaSmoker = c(0.71, 0.63)
  betaSBP    = c(0.018, 0.022)
  betaChol   = c(0.24, 0.02)
  
  coeffs <- array(c(-22.1, 4.71, -26.7, 5.64, -29.8, 6.36, -31.0, 6.62, -21.0, 4.62, -25.7, 5.47, -28.7, 6.23, -30.0, 6.42), 
                  c(2,2,2,2),
                  dimnames = list(c("alpha", "p"), c("CHD", "non CHD"), c("Male", "Female"), c("Low risk", "High risk")))
  
  # step 6 - score
  CVDrisk = calculateRisk(age, cholesterol, SBP, currentSmoker,
                          betaSmoker[1], betaSBP[1], betaChol[1], coeffs[,"CHD",gender,risk])
  NonCVDrisk = calculateRisk(age, cholesterol, SBP, currentSmoker,
                          betaSmoker[2], betaSBP[2], betaChol[2], coeffs[,"non CHD",gender,risk])
 
  CVDrisk + NonCVDrisk
}

In [None]:
sex="Male"
age_at_recruitment = 64
cholesterol = 6.8
systolic_blood_pressure = 140
current_smoker = 0

In [None]:
calculateScoreEur(age_at_recruitment, cholesterol, systolic_blood_pressure, current_smoker, sex,risk="Low risk")

In [None]:
temp = data.table(data %>% select(eid, age_at_recruitment, cholesterol, systolic_blood_pressure, smoking_status, sex) %>% mutate(current_smoker = case_when(smoking_status=="Current" ~ 1, TRUE ~ 0)))
SCORE_df = temp[, SCORE_SCORE_Ft_10:=calculateScoreEur(age_at_recruitment, cholesterol, systolic_blood_pressure, current_smoker, sex,risk="Low risk"), by="eid"] %>% select(c(eid, SCORE_SCORE_Ft_10))
SCORE_df

## ACC/AHA ASCVD (Goff 2014)

[Goof 2014](https://www.ahajournals.org/doi/pdf/10.1161/01.cir.0000437741.48606.98)

In [None]:
coefs_string = '"ln_age" "ln_age_squared" "ln_total_cholest" "ln_age_totcholest" "ln_hdlC" "ln_age_hdlC" "ln_treated_BP" "ln_age_BP" "ln_untreated_BP" "ln_age_ln_untreated_BP" "smoker" "nonsmoker" "ln_age_smoker" "diabetes" "nondiabetes" "meancoef" "baseline"
                "white_female" -29.799 4.884 13.54 -3.114 -13.578 3.149 2.019 0 1.957 0 7.574 0 -1.665 0.661 0 -29.18 0.9665
                "afroamer_female" 17.114 0 0.94 0 -18.92 4.475 29.291 -6.432 27.82 -6.087 0.691 0 0 0.874 0 86.61 0.9533
                "white_male" 12.344 0 11.853 -2.664 -7.99 1.769 1.797 0 1.764 0 7.837 0 -1.795 0.658 0 61.18 0.9144
                "afroamer_male" 2.469 0 0.302 0 -0.307 0 1.916 0 1.809 0 0.549 0 0 0.645 0 19.54 0.8954'

In [None]:
# from Appendix 7
coefs <- read.table(text=coefs_string, row.names=1)
#coefs

calculateASCVD <- function(coefs=coefs, sex="Male", ethnicity="White", age=53, cholesterol=2, hdl_cholesterol=1.1, systolic_blood_pressure=120, antihypertensives=1, diabetes=0, smoking=1) {
    
    if (!is.na(ethnicity)) {
            
        if ((sex=="Female")&(ethnicity != "Black")){const=coefs[1,]} 
        if ((sex=="Female")&(ethnicity=="Black")){const=coefs[2,]} 
        if ((sex=="Male")&(ethnicity != "Black")){const=coefs[3,]} 
        if ((sex=="Male")&(ethnicity=="Black")){const=coefs[4,]} 

        if (smoking==TRUE){smokc=const$smoker} else {smokc=const$nonsmoker}
        if (smoking==TRUE){smokcov=1} else {smokcov=0}

        if (antihypertensives==TRUE){BPc=const$ln_treated_BP} else {BPc=const$ln_untreated_BP}
        if (antihypertensives==TRUE){BPcov=const$ln_age_BP} else {BPcov=const$ln_age_ln_untreated_BP}

        if (diabetes==TRUE){diab=const$diabetes} else {diab=const$nondiabetes}

       # meancoef = const$meancoef

        calc = log(age)*const$ln_age+log(age)*log(age)*const$ln_age_squared+
            log(cholesterol*38.67)*const$ln_total_cholest+
            log(age)*log(cholesterol*38.67)*const$ln_age_totcholest+
            log(hdl_cholesterol*38.67)*const$ln_hdlC+
            log(age)*log(hdl_cholesterol*38.67)*const$ln_age_hdlC+
            smokc+smokcov*log(age)*const$ln_age_smoker+
            log(systolic_blood_pressure)*BPc+
            log(age)*log(systolic_blood_pressure)*BPcov+diab
        
        ASCVD<-(1-(const$baseline^exp(calc-const$meancoef)))
    } else {ASCVD=NA}
    return (ASCVD)
}

In [None]:
temp = data.table(data %>% select(eid, age_at_recruitment, ethnic_background, sex, cholesterol, hdl_cholesterol, systolic_blood_pressure, antihypertensives, diabetes2, smoking_status) %>%
                  mutate(current_smoker = case_when(smoking_status=="Current" ~ TRUE, TRUE ~ FALSE)))
ASCVD_df = temp[, SCORE_ASCVD_Ft_10:=calculateASCVD(coefs, sex, ethnic_background, age_at_recruitment, cholesterol, hdl_cholesterol, systolic_blood_pressure, antihypertensives, diabetes2, current_smoker), by=eid] %>% select(c(eid, SCORE_ASCVD_Ft_10))
head(ASCVD_df)

## UK QRISK3 (Hippisley-Cox 2017)

[Hippisley-Cox 2017](https://www.bmj.com/content/357/bmj.j2099)

In [None]:
options(warn=-1)
library(QRISK3)
temp = head(data)
temp = as.data.frame(data %>% 
                     mutate_if(is.logical, as.integer) %>%
                     mutate(cholesterol_HDL_ratio=cholesterol/hdl_cholesterol, 
                            gender=case_when(sex=="Female"~1, sex=="Male"~0),
                            ethnicity=case_when(ethnic_background == "White" ~ 1,
                                                ethnic_background == "Asian" ~ 5,
                                                ethnic_background == "Black" ~ 1,
                                                ethnic_background == "Chinese" ~ 5,
                                                ethnic_background == "Mixed" ~ 1,
                                                TRUE ~ 1
                                               ),
                            smoke=case_when(smoking_status == "Never" ~ 1,
                                            smoking_status == "Previous" ~ 2,
                                            smoking_status == "Current" ~ 4,
                                            ),
                            age = as.numeric(age_at_recruitment),
                            std_systolic_blood_pressure = 9.002537727355957,
                           ) #%>% drop_na()
                    )

In [None]:
options(warn=0)
QRISK3_df = QRISK3_2017(data = temp, 
            patid="eid", 
            gender="gender", 
            age="age", 
            atrial_fibrillation="atrial_fibrillation", 
            atypical_antipsy="atypical_antipsychotics", 
            regular_steroid_tablets="glucocorticoids",
            erectile_disfunction="erectile_dysfunction",
            migraine="migraine",
            rheumatoid_arthritis="rheumatoid_arthritis",
            chronic_kidney_disease="chronic_kidney_disease",
            severe_mental_illness="severe_mental_illness",
            systemic_lupus_erythematosis="systemic_lupus_erythematosus",
            blood_pressure_treatment="antihypertensives",
            diabetes1="diabetes1",
            diabetes2="diabetes2",
            weight="weight",
            height="standing_height",
            ethiniciy="ethnicity",
            heart_attack_relative="fh_heart_disease",
            cholesterol_HDL_ratio = "cholesterol_HDL_ratio",
            systolic_blood_pressure = "systolic_blood_pressure",
            std_systolic_blood_pressure = "std_systolic_blood_pressure", ### MISSING!
            smoke = "smoke",
            townsend = "townsend_deprivation_index_at_recruitment")
QRISK3_df = QRISK3_df %>% mutate(SCORE_QRISK3_Ft_10=QRISK3_2017/100) %>% select(c(eid, SCORE_QRISK3_Ft_10)) 

In [None]:
nrow(QRISK3_df)

# JOIN SCORE OUTPUTS

In [None]:
score_df = FRS_df %>% left_join(SCORE_df, by="eid") %>% left_join(ASCVD_df, by="eid") %>% left_join(QRISK3_df, by="eid") %>% arrange(eid)
head(score_df)

In [None]:
score_df_long = score_df %>% mutate(module="SCORE") %>% pivot_longer(-c(eid, module), names_to="features", values_to="Ft_10", names_pattern="SCORE_(.*)_Ft_10") %>% mutate(features = str_remove(features, "score_")) %>% mutate(calibrated=FALSE)
score_df_long

In [None]:
score_df_long %>% write_csv(glue("{data_results_path}/predictions_scores_210616.csv"))