In [26]:
setwd("/frazer01/projects/GTEx_v7/analysis/eqtls_deconvolution")

In [27]:
source("qtls/liver_cells/input/packages.R"      )
source("qtls/liver_cells/input/input_files.R"   )
source("qtls/liver_cells/input/functions.R"     )
source("qtls/liver_cells/input/input_data.R"    )
source("qtls/liver_cells/input/load_metadata.R" )


Loading packages...
Loading input files...
Loading functions...
Loading input data...


In [None]:
invisible(suppressWarnings(file.link("/home/matteo/notebooks/eqtls_deconvolution_gtex/permute_covariates.ipynb"    , "analysis/permute_covariates.ipynb"    )))


In [28]:
dir.create("perm"           , showWarnings = FALSE)
dir.create("perm/covariates", showWarnings = FALSE)
dir.create("perm/qtls"      , showWarnings = FALSE)
dir.create("perm/analysis"  , showWarnings = FALSE)
dir.create("perm/input"     , showWarnings = FALSE)
dir.create("perm/sh"        , showWarnings = FALSE)

In [87]:
qtls = fread("analysis//egenes.liver_cells.txt", header = TRUE, sep = "\t", data.table = FALSE)
#qtls = qtls[ qtls$egene == TRUE, ]

In [91]:
getwd()

# Permute covariates

In [88]:
permute_covariates = function(ii, covariates_assay)
{
    covariates_assay$assay_id = sample(x = covariates_assay$assay_id)
    
    write.table(covariates_assay, file = paste("perm/input", paste("covariates_assay", ii, "txt", sep = "."), sep = "/"), quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE)
}

invisible(lapply(1:1000, function(ii){permute_covariates(ii, covariates_assay)}))

# Get genotype data

In [89]:
get_gt = function(ii, qtls)
{
    gene_id = qtls[ii, "gene_id"]
    id      = qtls[ii, "id"     ]
    
    #message(paste(ii, gene_id))
    
    gtdata = fread(cmd = paste("grep", id, paste("input//genotypes", paste("gt_data", gene_id, "txt", sep = "."), sep = "/")), header = FALSE, data.table = FALSE)
    gtdata = gtdata[gtdata$V1 == id,]
    return(gtdata)
    
}

subject_ids = as.character(fread(cmd = paste("head", "-n 1", paste("input//genotypes", paste("gt_data", qtls[1,"gene_id"], "txt", sep = "."), sep = "/")), header = FALSE, data.table = FALSE))
subject_ids[[1]] = "id"

gtdata = as.data.frame(rbindlist(lapply(1:nrow(qtls), function(ii){get_gt(ii, qtls)})), stringsAsFactors = FALSE)
colnames(gtdata) = subject_ids
rownames(gtdata) = qtls$gene_id
gtdata$id        = NULL

fwrite(gtdata, "perm/input/gtdata.txt", quote = FALSE, sep = "\t", row.names = TRUE, col.names = TRUE)

# Run eQTLs

In [86]:
run_lmm_by_variant_perm = function(gene_id, gtdata, meta, covariates_assay, covariates_subject, vars0, vars1, compare = FALSE, type = "", n_perm = 0)
{
    expdata           = fread(paste("input/phenotypes", paste(gene_id, "txt", sep = "."), sep = "/"), header = TRUE, sep = "\t", data.table = FALSE)
    rownames(expdata) = expdata$sample_id
    meta              = meta   [ meta$assay_id %in% intersect(rownames(expdata), colnames(gtdata)),]
    gtdata            = gtdata [             , meta$assay_id]
    input             = expdata
    input$gt          = as.numeric(gtdata[gene_id, meta$assay_id])
    input$assay_id    = rownames(input)
    input             = merge(input, meta)
    input             = merge(input, covariates_assay)
    input             = merge(input, covariates_subject)
    input             = input[order(input$wgs_id),]
    input$sex         = as.numeric(factor(input$sex   ))
    
    lmm0   = suppressMessages(suppressWarnings(lmer(paste("norm", paste(vars0, collapse = "+"), sep = "~"), data = input, REML = FALSE)))
    out_lm = get_lmm_pval(lmm0, nrow(input), gene_id, gene_id)
    
    out_lm$bonferroni = min(c(1,out_lm$pval * as.numeric(unlist(strsplit(system(paste("wc -l", paste("input/genotypes", paste("gt_data", gene_id, "txt", sep = "."), sep = "/")), intern = TRUE), split = " "))[[1]])))
    
    return(out_lm)
}

run_permute_qtls = function(perm = 1)
{
    message(perm)
    covariates_assay  = fread(paste("perm/input", paste("covariates_assay", perm, "txt", sep = "."), sep = "/"), sep = "\t", header = TRUE, data.table = FALSE)
    gtdata            = fread("perm/input/gtdata.txt"                                                          , sep = "\t", header = TRUE, data.table = FALSE)
    sample_list       = covariates_assay$assay_id
    rownames(gtdata)  = gtdata$V1
    
    meta              = metadata[metadata$wgs_id %in% colnames(gtdata),]
    meta              = meta[meta$assay_id %in% sample_list,]
    meta              = meta[order(meta$wgs_id),]
    gtdata            = gtdata [             , meta$wgs_id]
    colnames(gtdata ) = meta$assay_id
    
    lmm_data          = as.data.frame(rbindlist(lapply(rownames(gtdata), function(gene_id){run_lmm_by_variant_perm(gene_id, gtdata, meta, covariates_assay, covariates_subject, vars0_assay, vars1_assay, compare = FALSE, type = "")})), stringsAsFactors = FALSE)
    lmm_data$perm     = perm
    
    fwrite(lmm_data, file = paste("perm/qtls", paste("qtls", perm, "txt", sep = "."), sep = "/"), quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE)
}

lapply(1:2, run_permute_qtls)

1
2


# Run qsub

qsub -pe smp 1 -t 1-1000 -tc 1000 -o /frazer01/projects/GTEx_v7/analysis/eqtls_deconvolution/qtls/liver_cells/perm/sh/perm.out -e /frazer01/projects/GTEx_v7/analysis/eqtls_deconvolution/qtls/liver_cells/perm/sh/perm.err /frazer01/projects/GTEx_v7/analysis/eqtls_deconvolution/analysis/permute_covariates.sh