In [1]:
suppressPackageStartupMessages(require(tidyverse))
suppressPackageStartupMessages(require(data.table))


In [2]:
phe_dir <- '/oak/stanford/groups/mrivas/projects/biomarkers/covariate_corrected'
phe_f   <- file.path(phe_dir, 'phenotypes/biomarkers_with_egfr_fastingglucose_nonalbumin.phe')
covar_f <- file.path(phe_dir, 'outputExtendedNoTDIreduced/phenotypes/full.table.combined.phe')


In [3]:
covars <- fread('covars.sex-indep.txt', head=F) %>% pull()

In [4]:
phe_df   <- fread(phe_f)
covar_df <- fread(covar_f)


In [12]:
all_df <- phe_df %>% 
select('IID', 'f.30850.0.0') %>% 
rename('Testosterone' = 'f.30850.0.0') %>% 
mutate(Testosterone = log10(Testosterone)) %>%
inner_join(
    covar_df %>% 
    mutate(ageBin_FastingTime = ageBin * FastingTime) %>%
    select(c('FID', 'IID', covars)),
    by='IID'
)


In [13]:
all_df %>% dim()


In [15]:
all_df %>% select(Testosterone) %>% summary()

  Testosterone  
 Min.   :-0.46  
 1st Qu.: 0.01  
 Median : 0.61  
 Mean   : 0.53  
 3rd Qu.: 1.07  
 Max.   : 1.74  
 NA's   :43736  

## read the data used in v1

- Use the same set of individuals used in v1


In [8]:
read_keep <- function(name){
    keep_dir <- '/scratch/PI/mrivas/users/erflynn/sex_div_gwas/phefiles'
    df <- fread(file.path(keep_dir, paste0(name, '.keep')))
    colnames(df) <- c('FID')
    df %>% mutate(IID = FID, keep = name)
}

read_phe <- function(){
    phe_dir <- '/oak/stanford/groups/mrivas/projects/biomarkers/covariate_corrected'
    phe_suffix <- 'phenotypes/residual/Testosterone.phe'
    
    bind_rows(lapply(c(
        'outputExtendedBMIreducedMaleWhiteBritish',
        'outputExtendedBMIreducedFemaleWhiteBritish'
    ), function(x){
        fread(file.path(phe_dir, x, phe_suffix))%>%
        filter(! IID %in% (t_med %>% select(IID) %>% pull())) %>%
        drop_na()
    }))
}

t_med <- fread('/scratch/PI/mrivas/users/erflynn/sex_div_gwas/data/t_related.txt')
t_med %>% dim() %>% print()

t_med %>% gather("med", "val", -IID) %>% filter(val > 0) %>% 
select(IID) %>% unique() %>% dim() %>% print()

t_phe <- read_phe()
t_phe %>% dim() %>% print()

df <- bind_rows(lapply(c('pre_meno', 'post_meno', 'onesex', 'zerosex'), function(x){
    read_keep(x) %>% 
    inner_join(t_phe, by=c('FID', 'IID'))
}))

df %>% select(FID) %>% unique() %>% dim() %>% dim()
df %>% count(keep) %>% print()


[1] 2766    7
[1] 2766    1
[1] 365682      3


NULL

[90m# A tibble: 4 x 2[39m
  keep           n
  [3m[90m<chr>[39m[23m      [3m[90m<int>[39m[23m
[90m1[39m onesex    [4m1[24m[4m8[24m[4m2[24m552
[90m2[39m post_meno  [4m9[24m[4m8[24m816
[90m3[39m pre_meno   [4m4[24m[4m1[24m291
[90m4[39m zerosex   [4m1[24m[4m8[24m[4m3[24m130


In [9]:
df %>% head(0)

FID,IID,keep,Testosterone
<dbl>,<dbl>,<chr>,<dbl>


In [16]:
v2_dfs <- list()
for (k in c('pre_meno', 'post_meno', 'onesex', 'zerosex')){
    v2_dfs[[k]] <- df %>% filter(keep == k) %>% 
    select(-keep, -Testosterone) %>%
    inner_join(
        all_df %>% select(c('FID', 'IID', 'Testosterone', covars)), 
        by=c('FID', 'IID')
    ) %>% 
    select(c('FID', 'IID', 'Testosterone', covars))
}


In [28]:
v2_dfs[['combined']] <- bind_rows(v2_dfs[['onesex']] %>% mutate(sex = 1), v2_dfs[['zerosex']] %>% mutate(sex = 0))


In [29]:
for (k in names(v2_dfs)){
    print(k)
    v2_dfs[[k]] %>% dim() %>% print()
}


[1] "pre_meno"
[1] 41291   116
[1] "post_meno"
[1] 98816   116
[1] "onesex"
[1] 182552    116
[1] "zerosex"
[1] 183130    116
[1] "combined"
[1] 365682    117


In [31]:
for (k in names(v2_dfs)){
    print(k)
    v2_dfs[[k]] %>% drop_na() %>% dim() %>% print()
}


[1] "pre_meno"
[1] 41291   116
[1] "post_meno"
[1] 98816   116
[1] "onesex"
[1] 182552    116
[1] "zerosex"
[1] 183130    116
[1] "combined"
[1] 365682    117


In [46]:
fit_lm <- function(df, phenotype, covariates){
    form <- stats::as.formula(paste(phenotype, ' ~ ', paste(c(1, covariates), collapse = " + ")))
    fit <- lm(form, data=df) 
    return(fit)

}

In [47]:
lm_models <- list()
for(k in c('onesex', 'zerosex')){
    lm_models[[k]] <- fit_lm(v2_dfs[[k]], 'Testosterone', covars)
}
for(k in c('combined')){
    lm_models[[k]] <- fit_lm(v2_dfs[[k]], 'Testosterone', c('sex', covars))
}


In [57]:
for(k in c('onesex', 'zerosex', 'combined')){
    v2_dfs[[k]] <- v2_dfs[[k]] %>% mutate(
        Testosterone_residuals = (lm_models[[k]])$residuals
    )    
}


In [59]:
phe_data_dir <- '/oak/stanford/groups/mrivas/users/ytanigaw/repos/rivas-lab/sex-div-analysis/snpnet/phe_data/v2'
for(k in c('onesex', 'zerosex', 'combined')){
    v2_dfs[[k]] %>% 
    fwrite(file.path(phe_data_dir, paste0(k, '.phe')), sep='\t', )
}


In [66]:
for(k in c('onesex', 'zerosex', 'combined')){
    print(k)
    save_RD <- file.path(phe_data_dir, paste0(k, '.lm.RData'))
    save_lm <- lm_models[[k]]
    save(save_lm, file = save_RD)
}


[1] "onesex"
[1] "zerosex"
[1] "combined"


In [73]:
for(k in c('onesex', 'zerosex', 'combined')){
    print(k)
    data.frame(summary(lm_models[[k]])$coefficient) %>%
    fwrite(file.path(phe_data_dir, paste0(k, '.lm.coeff.tsv')), sep='\t', row.names=T)
}


[1] "onesex"
[1] "zerosex"
[1] "combined"
