# Find the top 20 ICD-code phenotypes

In [1]:
library(tidyverse)

── [1mAttaching core tidyverse packages[22m ───────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.5.0     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ─────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors


In [2]:
pheno_df <- vroom::vroom("../../data/pheno/pheno_jan2024.tsv", show_col_types = F, 
                         delim = "\t", col_select = c("#FID", "IID", starts_with("b_"))) %>%
    rename(FID = `#FID`)

In [3]:
top_20_phenos <- pheno_df %>%
    select(-FID, -IID) %>%
    pivot_longer(everything()) %>%
    mutate(value = value - 2) %>%
    group_by(name) %>%
    summarize(n = sum(value)) %>%
    arrange(desc(n)) %>%
    head(20) %>%
    pull(name) %>%
    as.character %>% 
    sort

In [None]:
pheno_df %>%
    select(FID, IID, all_of(top_20_phenos)) %>%
    write_tsv("data/pheno/top20.tsv")