# GWAS for parents survival
## Initialize definitions

In [17]:
source(here::here("code/init.R"))
source(here::here("code/gwas.R"))
#options(gmax.data.size = 1e9)
library(gwiser) 

## Define parents survival phenotype

In [2]:
parents_survival <- tgutil::fread(here::here("output/ukbb_parents.csv"))
head(parents_survival)


Unnamed: 0_level_0,id,mother_age_at_death,mother_last_alive,father_age_at_death,father_last_alive,mdead,mfollow_time,fdead,ffollow_time
Unnamed: 0_level_1,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>,<int>,<lgl>,<int>
1,1000019,inf,87.0,49.0,inf,False,87.0,True,49
2,1000022,inf,75.0,inf,78.0,False,75.0,False,78
3,1000035,inf,inf,inf,87.0,False,,False,87
4,1000046,60.0,inf,60.0,inf,True,60.0,True,60
5,1000054,inf,74.0,83.0,inf,False,74.0,True,83
6,1000063,52.0,inf,72.0,inf,True,52.0,True,72


In [3]:
scores <- data.table::fread(here::here("output/disease_score_inverse_rank.tsv")) %>% 
   select(id, age, sex, disease, score_norm) %>% spread(disease, score_norm)
head(scores)

id,age,sex,ckd,copd,diabetes,liver,ncvd
<int>,<int>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1000019,60,female,0.6888426,0.40155519,-0.2422354,0.2831111,0.37106
1000022,50,female,1.6588384,1.29550119,1.5858586,1.718382,1.6632586
1000035,60,male,1.1588003,1.64335532,1.7567751,0.556867,1.616371
1000046,70,female,-0.5749957,-0.01139192,-1.2153981,-0.683464,1.573662
1000054,45,female,-1.154321,-0.58443457,-0.8761561,-0.6001504,-0.8822776
1000063,65,male,1.4177173,2.05214399,-0.2529519,0.6911006,1.3997413


### loading PCA and genes

In [4]:
pca <- get_ukbb_pca()
genes <- get_imputed_genes()

[36mi[39m Loading precomputed PCA

Loading preprocessed genetic data (imputed genotypes)



In [5]:
wb_patients <- fread(here("output/ukbb_white.british_patients.csv"))$id

In [6]:
parents_survival <- parents_survival %>% 
    filter(id %in% wb_patients, id %in% scores$id, id %in% genes$fam$sample.ID) %>% 
    left_join(scores) %>% 
    left_join(pca)
head(parents_survival)

[1m[22mJoining, by = "id"
[1m[22mJoining, by = "id"


Unnamed: 0_level_0,id,mother_age_at_death,mother_last_alive,father_age_at_death,father_last_alive,mdead,mfollow_time,fdead,ffollow_time,age,⋯,PC11,PC12,PC13,PC14,PC15,PC16,PC17,PC18,PC19,PC20
Unnamed: 0_level_1,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>,<int>,<lgl>,<int>,<int>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,1000022,inf,75.0,inf,78.0,False,75.0,False,78,50,⋯,0.3170125,2.6328147,1.7163368,-5.01615392,-1.5772722,-3.564473,-2.2181418,1.5713741,-1.9076169,2.055364
2,1000035,inf,inf,inf,87.0,False,,False,87,60,⋯,-2.0972524,1.4066766,-0.9012911,1.3185494,0.1942449,-6.922619,0.3071414,-2.078771,1.5407592,1.18437
3,1000046,60.0,inf,60.0,inf,True,60.0,True,60,70,⋯,3.0222677,-0.423668,-1.9314079,-0.09110024,-0.5955289,-1.992061,-6.1250101,0.2095399,0.2382121,-2.225729
4,1000063,52.0,inf,72.0,inf,True,52.0,True,72,65,⋯,3.5168629,-0.7654475,0.4362669,0.35964717,-3.3958734,-2.994583,4.4955826,-1.0551477,3.6623219,4.815468
5,1000078,84.0,inf,63.0,inf,True,84.0,True,63,60,⋯,-2.4854311,4.0357319,2.6573689,-3.9186587,0.9580225,1.568436,-0.6186636,1.1179565,-1.518018,-2.572448
6,1000081,78.0,inf,80.0,inf,True,78.0,True,80,60,⋯,-0.3018311,0.1101689,-2.1468426,1.40032702,-2.2095284,1.655876,-4.464475,0.3770999,-2.8147615,3.609816


In [8]:
father_survival <- parents_survival %>% 
    filter(!is.na(ffollow_time), ffollow_time > 0) %>% 
    select(id, time = ffollow_time, status = fdead, age:PC20) %>% 
    na.omit()

In [9]:
mother_survival <- parents_survival %>% 
    filter(!is.na(mfollow_time), mfollow_time > 0) %>% 
    select(id, time = mfollow_time, status = mdead, age:PC20) %>% 
    na.omit()

In [10]:
both_survival <- bind_rows(
    father_survival %>% mutate(parent = "father"), 
    mother_survival %>% mutate(parent = "mother")
    ) %>%
        mutate(parent = factor(parent)) %>% 
        filter(!(status & time < 40))  %>% # remove parents who died before age 40
        mutate(id_both = paste0(id, ".", parent))

In [18]:
gwas_both <- {
    df <- run_gwas_cox_both_parents(genes, both_survival %>% rename(gender=sex), null_fn = here("output/cox_parents_survival_both_null"), max.jobs=200, use_sge=TRUE)
    df <- df %>% left_join(genes$map, by = "marker.ID")
    df <- df %>%
        rename(chrom = chromosome, start = physical.pos) %>%
        mutate(chrom = paste0("chr", chrom), chrom = gsub("chr0", "chr", chrom), end = start + 1, pval = log10(p.value.spa)) %>%
        select(chrom, start, end, pval, marker.ID, allele1, allele2, everything())    
    } %cache_df% here("output/cox_parents_survival_both_gwas.tsv") %>% as_tibble()

> Generating Cox NULL model

Using cached rds from '/net/mraid14/export/data/users/nettam/projects/emr/ukbiobank/notebook/output/cox_parents_survival_both_null'

> Running Cox GWAS

> Running [34m[34m13840[34m[39m jobs

[36mi[39m Loading [34m[34mgwiser[34m[39m



[1] "Sample size is 615506."
[1] "Number of variants is 1001."
[1] "Start Analyzing..."
[1] "2022-10-27 14:55:18 IDT"
[1] "Analysis Complete."
[1] "2022-10-27 14:56:56 IDT"


[36mi[39m Loading [34m[34mgwiser[34m[39m



[1] "Sample size is 615506."
[1] "Number of variants is 1001."
[1] "Start Analyzing..."
[1] "2022-10-27 14:57:41 IDT"
[1] "Analysis Complete."
[1] "2022-10-27 14:58:55 IDT"


[36mi[39m Loading [34m[34mgwiser[34m[39m



[1] "Sample size is 615506."
[1] "Number of variants is 1001."
[1] "Start Analyzing..."
[1] "2022-10-27 14:59:37 IDT"
[1] "Analysis Complete."
[1] "2022-10-27 15:01:19 IDT"


[36mi[39m Loading [34m[34mgwiser[34m[39m



[1] "Sample size is 615506."
[1] "Number of variants is 1001."
[1] "Start Analyzing..."
[1] "2022-10-27 15:02:05 IDT"
[1] "Analysis Complete."
[1] "2022-10-27 15:03:53 IDT"


[36mi[39m Loading [34m[34mgwiser[34m[39m



[1] "Sample size is 615506."
[1] "Number of variants is 1001."
[1] "Start Analyzing..."
[1] "2022-10-27 15:04:35 IDT"
[1] "Analysis Complete."
[1] "2022-10-27 15:06:11 IDT"


[36mi[39m Loading [34m[34mgwiser[34m[39m



[1] "Sample size is 615506."
[1] "Number of variants is 1001."
[1] "Start Analyzing..."
[1] "2022-10-27 15:06:53 IDT"
[1] "Analysis Complete."
[1] "2022-10-27 15:08:25 IDT"


[36mi[39m Loading [34m[34mgwiser[34m[39m



In [None]:
gwas_mother <- {
    df <- run_gwas_cox(genes, mother_survival %>% rename(gender=sex), null_fn = here("output/cox_parents_survival_mother_null"), max.jobs=200)
    df <- df %>% left_join(genes$map, by = "marker.ID")
    df <- df %>%
        rename(chrom = chromosome, start = physical.pos) %>%
        mutate(chrom = paste0("chr", chrom), chrom = gsub("chr0", "chr", chrom), end = start + 1, pval = log10(p.value.spa)) %>%
        select(chrom, start, end, pval, marker.ID, allele1, allele2, everything())    
    } %cache_df% here("output/cox_parents_survival_mother_gwas.tsv") %>% as_tibble()

In [None]:
gwas_father <- {
    df <- run_gwas_cox(genes, father_survival %>% rename(gender=sex), null_fn = here("output/cox_parents_survival_father_null"), max.jobs=200)
    df <- df %>% left_join(genes$map, by = "marker.ID")
    df <- df %>%
        rename(chrom = chromosome, start = physical.pos) %>%
        mutate(chrom = paste0("chr", chrom), chrom = gsub("chr0", "chr", chrom), end = start + 1, pval = log10(p.value.spa)) %>%
        select(chrom, start, end, pval, marker.ID, allele1, allele2, everything())    
    } %cache_df% here("output/cox_parents_survival_father_gwas.tsv") %>% as_tibble()