In [1]:
suppressWarnings(suppressPackageStartupMessages({
    library(tidyverse)
    library(data.table)
}))


In [2]:
source('parameters.sh')


In [3]:
ct_df <- file.path(
    RData_d, results_sub_d,
    sprintf('%s.%s.tsv', rdata_base, 'score_contribution')
) %>%
fread() %>%
rename_with(function(x){str_replace(x, '#', '')}, starts_with("#")) %>%
mutate(rowname = str_replace(rowname, '_adjstatins', '')) %>%
mutate(rowname = str_replace(rowname, '_adjstatin', '')) %>%
mutate(component = str_replace(component, '^Component', ''))

sq_df <- file.path(
    RData_d, results_sub_d,
    sprintf('%s.%s.tsv', rdata_base, 'score_squared_cosine')
) %>%
fread() %>%
rename_with(function(x){str_replace(x, '#', '')}, starts_with("#"))  %>%
mutate(rowname = str_replace(rowname, '_adjstatins', '')) %>%
mutate(rowname = str_replace(rowname, '_adjstatin', '')) %>%
mutate(component = str_replace(component, '^Component', ''))



In [4]:
# get the list of traits

traits <- sq_df %>% arrange(rowname) %>% pull(rowname) %>% unique
data.frame(
    trait_order = 1:length(traits),
    trait = traits
) %>% 
mutate(
    trait_label = str_replace_all(traits, '_', ' ')
) -> traits_df


In [5]:
# get the list of components

sq_df %>% filter(rowname == 'AST_ALT_ratio') %>%
mutate(component_order = rank(-squared_cosine_score)) %>%
arrange(component_order) %>%
select(component, component_order) -> components_df


In [6]:
ct_df %>%
left_join(
    sq_df %>%
    filter(rowname == 'AST_ALT_ratio') %>%
    select(-rowname),
    by="component"
) %>%
rename('trait'='rowname') %>%
mutate(
    # scaled contribution score
    scaled_ct_score = contribution_score * squared_cosine_score
)  %>%
select(-contribution_score, -squared_cosine_score) %>%
left_join(
    traits_df, by="trait"
) %>%
left_join(
    components_df, by="component"
) %>%
group_by(component_order) %>%
arrange(-scaled_ct_score) %>%
mutate(cumsum_scaled = cumsum(scaled_ct_score) - scaled_ct_score) %>%
ungroup() %>%
arrange(component_order, trait_order) %>%
select(
    component_order, component,
    trait_order, trait, trait_label,
    scaled_ct_score, cumsum_scaled
)-> full_df


In [7]:
full_df %>% 
fwrite('AST_ALT_ratio_plot_data.tsv', sep='\t', na = "NA", quote=F)
