In [1]:
library(tidyverse)
library(corrr)

── [1mAttaching core tidyverse packages[22m ───────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.5.0     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ─────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors


# Real data

## Phenotypic correlation

In [2]:
rp_df <- read_tsv("data/pheno/top20.tsv", show_col_types = F) %>%
    select(-FID, -IID) %>%
    correlate(method = "pearson", use = "pairwise.complete.obs") %>%
    stretch 

rp_df %>% write_tsv("plot_data/phenotypic_correlation.tsv")

Correlation computed with
[36m•[39m Method: 'pearson'
[36m•[39m Missing treated using: 'pairwise.complete.obs'


## Heritability

In [3]:
h2_df <- fs::dir_ls("data/h2", glob = "*.hers") %>%
    vroom::vroom(id = "source", delim = " ", show_col_types = F, .name_repair = "unique_quiet") %>%
    filter(Component == "Her_All") %>%
    mutate(phenotype = source %>% str_extract("b_[A-Z0-9]+")) %>%
    select(phenotype, h2 = Heritability)

h2_df %>% write_tsv("plot_data/heritability.tsv")

h2_df %>% head(2)

phenotype,h2
<chr>,<dbl>
b_E78,0.118519
b_F32,0.040428


## Genetic correlation

In [4]:
rg_df <- fs::dir_ls("data/rg", glob = "*.cors") %>%
    vroom::vroom(id = "source", delim = " ", show_col_types = F, .name_repair = "unique_quiet") %>%
    filter(Component == "Cor_All") %>%
    rowwise %>%
    mutate(
        phenotypes = source %>% str_extract_all("b_[A-Z0-9]+"),
        phenotype1 = first(phenotypes),
        phenotype2 = last(phenotypes)
    ) %>%
    ungroup %>%
    select(phenotype1, phenotype2, rg = Value)

rg_df %>% write_tsv("plot_data/genetic_correlation.tsv")

rg_df %>% head(2)

phenotype1,phenotype2,rg
<chr>,<chr>,<dbl>
b_E78,b_F32,0.237043
b_E78,b_I10,0.617681


# Simulation

## Phenotypic correlation

In [5]:
sim_pcov_df <- fs::dir_ls("../../1.simulation/data/simulations/", glob = "*sim_*_h_0.01_s_0.5_p_0.0") %>%
    map_chr(~fs::path_join(c(.x, "pcov", "liability.tsv"))) %>%
    keep(fs::file_exists) %>%
    vroom::vroom(id = "source", delim = "\t", show_col_types = F, .name_repair = "unique_quiet") %>%
    rename(phenotype1 = `...1`) %>%
    mutate(simulation = source %>% str_extract("(?<=sim_)[0-9]+")) %>%
    select(-source) %>%
    pivot_longer(starts_with("Trait_"), names_to = "phenotype2", values_to = "r")

sim_pcov_df %>% write_tsv("plot_data/sim_phenotypic_correlation.tsv")

sim_pcov_df %>% head(2)

phenotype1,simulation,phenotype2,r
<chr>,<chr>,<chr>,<dbl>
Trait_01,100,Trait_01,1.0
Trait_01,100,Trait_02,0.7117533


## Heritability

In [6]:
sim_h2_df <- fs::dir_ls("../../1.simulation/data/simulations/", glob = "*sim_*_h_0.01_s_0.5_p_0.0") %>%
    map_chr(~fs::path_join(c(.x, "gcov", "g_0.05.tsv"))) %>%
    keep(fs::file_exists) %>%
    vroom::vroom(id = "source", delim = "\t", show_col_types = F, .name_repair = "unique_quiet") %>%
    rename(phenotype1 = `...1`) %>%
    mutate(simulation = source %>% str_extract("(?<=sim_)[0-9]+")) %>%
    select(-source) %>%
    pivot_longer(starts_with("Trait_"), names_to = "phenotype2", values_to = "h2") %>%
    filter(phenotype1 == phenotype2) %>%
    select(simulation, phenotype = phenotype1, h2)

sim_h2_df %>% write_tsv("plot_data/sim_heritability.tsv")

sim_h2_df %>% head(2)

simulation,phenotype,h2
<chr>,<chr>,<dbl>
100,Trait_01,0.016275
100,Trait_02,0.009196


## Genetic correlation

In [7]:
sim_rg_df <- fs::dir_ls("../../1.simulation/data/simulations/", glob = "*sim_*_h_0.01_s_0.5_p_0.0") %>%
    map_chr(~fs::path_join(c(.x, "gcov", "g_0.05.tsv"))) %>%
    keep(fs::file_exists) %>%
    vroom::vroom(id = "source", delim = "\t", show_col_types = F, .name_repair = "unique_quiet") %>%
    rename(phenotype1 = `...1`) %>%
    mutate(simulation = source %>% str_extract("(?<=sim_)[0-9]+")) %>%
    select(-source) %>%
    # Convert from covariance to correlation matrix
    nest(data = -simulation) %>%
    rowwise %>%
    mutate(
        data = data %>% 
            as.data.frame %>% 
            column_to_rownames("phenotype1") %>% 
            as.matrix %>% 
            cov2cor %>% 
            as.data.frame %>%
            rownames_to_column("phenotype1") %>%
            list
    ) %>%
    ungroup %>%
    unnest(data) %>%
    pivot_longer(starts_with("Trait_"), names_to = "phenotype2", values_to = "rg") %>%
    filter(phenotype1 < phenotype2)

sim_rg_df %>% write_tsv("plot_data/sim_genetic_correlation.tsv")

sim_rg_df %>% head(2)

simulation,phenotype1,phenotype2,rg
<chr>,<chr>,<chr>,<dbl>
100,Trait_01,Trait_02,0.8101352
100,Trait_01,Trait_03,0.4700921
