# Initialize QTL analyses
- select phenotype (rna, atac, chip)
- create output folders
- select samples
- select interaction terms
- V02: run single studies


In [1]:
setwd("/frazer01/projects/GTEx_v7/analysis/eqtls_deconvolution")

In [2]:
invisible(suppressWarnings(file.link("/frazer01/home//matteo/notebooks/eqtls_deconvolution_gtex/initialize_qtl_analyses.ipynb", "analysis/initialize_qtl_analyses.ipynb")))

source("analysis/cardiac_qtls_packages.R"      )
source("analysis/cardiac_qtls_input_files.R"   )
source("analysis/cardiac_qtls_functions.R"     )
source("analysis/cardiac_qtls_input_data.R"    )
source("analysis/cardiac_qtls_load_metadata.R" )


Loading packages...
Loading input files...
Loading functions...
Loading input data...
Loading metadata...


# Run samples by tissue

In [3]:
run_by_tissue = function(analysis_name, tissue, phenotype, vars0_assay, vars1_assay)
{
    message(paste("Preparing", tissue, "..."))
    sample_list   = covariates_rna[covariates_rna$assay_id %in% metadata[metadata$tissue== tissue, "rna_id"], "assay_id"]
    gene_list     = read.table(paste("input/phenotypes", paste(phenotype, tissue, "list.txt", sep = "_"), sep = "/"), header = FALSE, check.names = FALSE)$V1    
    
    initialize = initialize_qtl_analysis(analysis_name, phenotype, paste("input/phenotypes", paste(phenotype, tissue, sep = "_"), sep = "/"), sample_list, gene_list, vars0_assay, vars1_assay,
                                         qtl_distance          =  1e6   ,
                                         maf_threshold         =    0.05,
                                         phenotype_min_value   =    0.5 , 
                                         phenotype_min_samples =    0.1 ,
                                         n_perm                =    1   ,
                                         primary               = TRUE
                                        )
    
    qtl_folder = initialize$folder
    n_genes    = initialize$n_genes
    
    message(paste("Genes to analyze", n_genes, sep = " = "))
    
    run_qsub(qtl_folder, n_genes, run_qsub = TRUE, queue = "week")
}

liver_cells  = c("endothelial", "hepatocyte", "kupffer_cell", "nk_cell")
liver2_cells = c("endothelial_venous", "gdt", "hepatocyte0", "hepatocyte3", "hepatocyte4", "macrophage", "nkt_cell", "endothelial_periportal")
liver3_cells = colnames(covariates_rna)
liver3_cells = liver3_cells[grepl("merged_", liver3_cells) == TRUE]
skin_cells   = c("outer_bulge","inner_bulge","epidermis_basal","epidermis","epidermis_stem_cell","leukocyte")
skin2_cells  = c("collapsed_leukocyte", "collapsed_fibroblast", "collapsed_keratinocyte")

#run_by_tissue("liver_original", "liver", "rna", c("gt", "age", "sex", paste("PC", 1:5, sep = ""), "(1|sex)"                ), c("gt:age"))
#run_by_tissue("liver_cells"   , "liver", "rna", c("gt", "age", "sex", paste("PC", 1:5, sep = ""), "(1|sex)"   , liver_cells), paste("gt", liver_cells, sep = ":"))
#run_by_tissue("liver2_cells"   , "liver", "rna", c("gt", "age", "sex", paste("PC", 1:5, sep = ""), "(1|sex)"   , liver2_cells), paste("gt", liver2_cells, sep = ":"))
#run_by_tissue("liver3_cells"   , "liver", "rna", c("gt", "age", "sex", paste("PC", 1:5, sep = ""), "(1|sex)"   , liver3_cells), paste("gt", liver3_cells, sep = ":"))
#run_by_tissue("skin_original" , "skin" , "rna", c("gt", "age", "sex", paste("PC", 1:5, sep = ""), "(1|wgs_id)"             ), c("gt:age"))
#run_by_tissue("skin_cells"    , "skin" , "rna", c("gt", "age", "sex", paste("PC", 1:5, sep = ""), "(1|wgs_id)", skin_cells ), paste("gt", skin_cells , sep = ":"))
run_by_tissue("skin2_cells"    , "skin" , "rna", c("gt", "age", "sex", paste("PC", 1:5, sep = ""), "(1|wgs_id)", skin2_cells ), paste("gt", skin2_cells , sep = ":"))


Preparing skin ...
Genes to analyze = 24098
Running qsub


In [7]:
#monitor_qtls("liver_original")
#monitor_qtls("liver_cells"   )
#monitor_qtls("liver2_cells"  )
#monitor_qtls("liver3_cells"  )
#monitor_qtls("skin_original" )
#monitor_qtls("skin_cells"    )
monitor_qtls("skin2_cells"    )


2019-08-27 09:17:32
skin2_cells
Genes analyzed = 24029
Genes to analyze = 69
LMMs compared = 13257


In [8]:
#merge_qtls(paste("/frazer01/projects/GTEx_v7/analysis/eqtls_deconvolution", "qtls", "liver_original", sep = "/"), "liver_original")
#merge_qtls(paste("/frazer01/projects/GTEx_v7/analysis/eqtls_deconvolution", "qtls", "liver_cells"   , sep = "/"), "liver_cells"   )
#merge_qtls(paste("/frazer01/projects/GTEx_v7/analysis/eqtls_deconvolution", "qtls", "liver2_cells"  , sep = "/"), "liver2_cells"  )
#merge_qtls(paste("/frazer01/projects/GTEx_v7/analysis/eqtls_deconvolution", "qtls", "liver3_cells"  , sep = "/"), "liver3_cells"  )
#merge_qtls(paste("/frazer01/projects/GTEx_v7/analysis/eqtls_deconvolution", "qtls", "skin_original" , sep = "/"), "skin_original" )
#merge_qtls(paste("/frazer01/projects/GTEx_v7/analysis/eqtls_deconvolution", "qtls", "skin_cells"    , sep = "/"), "skin_cells"    )
merge_qtls(paste("/frazer01/projects/GTEx_v7/analysis/eqtls_deconvolution", "qtls", "skin2_cells"    , sep = "/"), "skin2_cells"    )
#

skin2_cells
Tested genes = 24029
eGenes = 11497



In [7]:
7840/1369
10177/1369
12011/9232

In [10]:
paste("/frazer01/projects/CARDIPS/analysis/eqtls_deconvolution", "qtls", "liver_original", sep = "/")

# All samples, ATAC-seq peaks, interaction terms are gt:population1

In [137]:
analysis_name = "atac_summits"
phenotype     = "atac_summits"
geneinfo_file = "input//phenotypes/atac_summits_info_exp.txt"
geneinfo      = read.table(geneinfo_file, header = TRUE)
sample_list   = metadata$atac_id
gene_list     = read.table(paste("input/phenotypes/", analysis_name, "_list.txt", sep = ""), header = FALSE, check.names = FALSE)$V1
vars0_assay   = c("gt" , "population1", "sex", paste("PC", 1:10, sep = ""), "(1|wgs_id)", "(1|family_id)") # list of variants for LMM formula
vars1_assay   = c("gt:population1") # list of interaction terms to test vs LMM without interactions

initialize = initialize_qtl_analysis(analysis_name, phenotype, "input/phenotypes/atac_summits", sample_list, gene_list, vars0_assay, vars1_assay,
                                     qtl_distance          = 1e5   ,
                                     maf_threshold         =   0.01,
                                     phenotype_min_value   =   1   , 
                                     phenotype_min_samples =   0.2 ,
                                     geneinfo_file         = geneinfo_file,
                                     n_perm                = 2,
                                     primary               = TRUE
                                    )

qtl_folder = initialize$folder
n_genes    = initialize$n_genes

message(paste("Genes to analyze", n_genes, sep = " = "))

run_qsub(qtl_folder, n_genes, run_qsub = FALSE, queue = "opt", tc = 1000)
#run_qsub(qtl_folder, 10     , run_qsub = TRUE , queue = "opt", tc = 1000)


Genes to analyze = 328772


In [162]:
monitor_qtls(analysis_name)


2019-02-12 14:46:40
atac_summits
Genes analyzed = 24677
Genes to analyze = 304095
LMMs compared = 5008


In [156]:
qtls = merge_qtls(paste("/frazer01/projects/CARDIPS/analysis/cardiac_qtls", "qtls", analysis_name, sep = "/"), phenotype)

    

atac_summits: Tested genes = 14012


In [9]:
str(qtls)
lmm_compare = qtls$lmm_compare

ERROR: Error in str(qtls): object 'qtls' not found


In [144]:
min_pval = "min_pval"
lmm_compare        = do.call("rbind", lapply(sort(unique(lmm_compare$type)), function(type){fdr_by_type(lmm_compare, min_pval, type)}))
    lmm_compare[lmm_compare$fdr > 0.1, "best"] = "gt"
