# eQTL mQTL overlap

How many eQTLs are also mQTLs and vice versa

In [2]:
.libPaths("~/R/x86_64-redhat-linux-gnu-library/3.2/")
# config opts and libraries
options(repr.plot.width = 6)
options(repr.plot.height = 5)
library(ggplot2);
library(plyr);
library(dplyr);
library(reshape2);
library(LSD);
library(pheatmap);
library(parallel);
options(mc.cores = 24);
library(stringr);
library(RColorBrewer);
library(qtl);
library(funqtl);

In [3]:
# load mQTLs
load("/g/steinmetz/brooks/genphen//metabolome/qtls/mQTLs_comball_funqtl_2014.rda")
mQTLs_funqtl_2014_class = sapply(mQTLs_funqtl_2014,class)
mQTLs_funqtl_2014 = mQTLs_funqtl_2014[mQTLs_funqtl_2014_class!="try-error"]



In [4]:
# load eQTLs
load("/g/steinmetz/brooks/genphen/transcriptome/qtl/eQTL_08032016.rda")

In [5]:
# load transcript annotation
load("/g/steinmetz/brooks/genphen/transcriptome/data/trx_df.rda")

In [6]:
# load genotype and markers files
genotype_f = "/g/steinmetz/brooks/yeast/genomes/S288CxYJM789/genotypes_S288c_R64.rda"
load(genotype_f)

In [7]:
# load data
endo_f = "/g/steinmetz/project/GenPhen/data/endometabolome/data/endometabolite_full_12102015.rda"
load(endo_f)

# use only relaitve data
endometabolite = filter(endometabolite, time_format == "relative")

#head(endometabolite)

In [8]:
# load expression data
load("/g/steinmetz/brooks/genphen/transcriptome/data/trx_df.rda")

In [9]:
head(endometabolite)

Unnamed: 0,strain,metabolite,replicate,time_format,time,value,value.log2,relative.log2,derivative.log2,cellconc,biovol,singlecellvol,peakarea,batch
1,01B,AKG,1,relative,1,64.62115,6.036089,1.0,,,,,,dM1
2,01B,AKG,1,relative,2,75.42941,6.256056,1.036442,0.219967,,,,,dM1
3,01B,AKG,1,relative,3,46.9538,5.583573,0.9250316,-0.6724827,,,,,dM1
4,01B,AKG,1,relative,4,57.4146,5.868257,0.9721952,0.2846837,,,,,dM1
5,01B,AKG,2,relative,1,65.08506,6.046252,1.0,,,,,,dM1
6,01B,AKG,2,relative,2,61.94676,5.97606,0.9883908,-0.07019202,,,,,dM1


In [10]:
if (!exists("mQTL_table")) {
    mQTL_table = do.call(rbind, lapply(names(mQTLs_funqtl_2014),function(m) {
        interval = mQTLs_funqtl_2014[[m]]$qtl_intervals
        if (length(interval)>0) {
            do.call(rbind, lapply(seq(1:length(interval)), function(z){
                thisinterval = interval[[z]]
                highmrk = names(thisinterval[2])
                data.frame(metabolite = m, 
                   as.data.frame(range(thisinterval)),
                   maxlod = mQTLs_funqtl_2014[[m]]$qtls_alt[highmrk,"mlod"], 
                   maxmrk = names(thisinterval[2]))
            }))          
        } else {
            data.frame()
        }
    }))
    mQTL_table$seqnames = sapply(mQTL_table$seqnames,function(x){
        paste(substr(x,1,3),as.roman(substr(x,4,5)),sep="")})
}

Loading required package: GenomicRanges
Loading required package: BiocGenerics

Attaching package: ‘BiocGenerics’

The following object is masked from ‘package:Matrix’:

    as.vector

The following objects are masked from ‘package:spam’:

    as.vector, cbind, rbind

The following objects are masked from ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following objects are masked from ‘package:dplyr’:

    combine, intersect, setdiff, union

The following object is masked from ‘package:stats’:

    xtabs

The following objects are masked from ‘package:base’:

    anyDuplicated, append, as.data.frame, as.vector, cbind, colnames,
    do.call, duplicated, eval, evalq, Filter, Find, get, intersect,
    is.unsorted, lapply, Map, mapply, match, mget, order, paste, pmax,
    pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce, rep.int,
 

In [11]:
head(eQTL_table)

Unnamed: 0,gene,seqnames,start,end,width,strand,maxlod,maxmrk
1,SUT433,chrI,1505,29279,27775,*,18.85243,mrk_79
8,SUT433,chrVIII,309963,469661,159699,*,4.47395,mrk_23688
3,CUT437,chrI,1505,44588,43084,*,7.199353,mrk_96
4,SUT434,chrII,427719,515157,87439,*,3.679556,mrk_4238
5,SUT435,chrI,32556,52164,19609,*,7.917256,mrk_99
6,CUT439,chrXIV,42770,780902,738133,*,2.760089,mrk_42223


In [12]:
mQTL_ranges = GRanges(seqnames = mQTL_table$seqnames, 
              ranges = IRanges(start = mQTL_table$start, end = mQTL_table$end, width = mQTL_table$width))
eQTL_ranges = GRanges(seqnames = eQTL_table$seqnames, 
              ranges = IRanges(start = eQTL_table$start, end = eQTL_table$end, width = eQTL_table$width))

In [30]:
f = "ge_me.rda"
if (file.exists(f)) {
    load(f)
} else {
    ge = trx_df %>% group_by(name, strain) %>% summarize(value = median(log2(value + 1)))
    me = endometabolite %>% group_by(metabolite, strain, time ) %>% summarize(value = median(value.log2))
    save(ge, me, file = f)
}
# only keep ORFs
orfgenes = unique(filter(trx_df, type=="ORFs")$name)
ge = filter(ge, name %in% orfgenes)

# First test how many mQTLs are also eQTLs

In [33]:
head(ge)

Unnamed: 0,name,strain,value
1,YAL067C,01C,0.0
2,YAL067C,02B,0.2443326
3,YAL067C,02C,0.0
4,YAL067C,02D,0.0
5,YAL067C,03A,0.0
6,YAL067C,03C,0.2292453


In [24]:
head(trx_df)

Unnamed: 0,strain,rep,name,type,chr,start,end,strand,value
X14C_biorep1,14C,biorep1,SUT432,SUTs,chr01,4875,5275,-,196.7108
X17C_biorep1,17C,biorep1,SUT432,SUTs,chr01,4875,5275,-,125.5987
X17D_biorep1,17D,biorep1,SUT432,SUTs,chr01,4875,5275,-,4.924066
X18C_biorep1,18C,biorep1,SUT432,SUTs,chr01,4875,5275,-,0.0
X18D_biorep1,18D,biorep1,SUT432,SUTs,chr01,4875,5275,-,0.0
X19C_biorep1,19C,biorep1,SUT432,SUTs,chr01,4875,5275,-,0.0


In [34]:
ge_mqtl = do.call(rbind,lapply(seq(1,dim(mQTL_table)[1]),function(i){
    print(i)
    metabolite = mQTL_table[i,"metabolite"]
    maxmrk = mQTL_table[i,"maxmrk"]
    genodf = data.frame(geno = geno[maxmrk,], strain = names(geno[maxmrk,]))
    o = merge(ge, genodf, by = "strain" ) %>% group_by(name) %>% do({
        cortest = cor.test(.$value, .$geno, method = "pearson", use="pair")
        data.frame(estimate = cortest$estimate, pval = cortest$p.value)
    })
    o$metabolite = metabolite
    o$maxmrk = maxmrk
    return(o)
}))
ge_mqtl$pval = p.adjust(ge_mqtl$pval)

[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9


In [20]:
i = 1
metabolite = mQTL_table[i,"metabolite"]
maxmrk = mQTL_table[i,"maxmrk"]
genodf = data.frame(geno = geno[maxmrk,], strain = names(geno[maxmrk,]))
ge_mqtl = merge(ge, genodf, by = "strain" ) %>% group_by(name) %>% do({
    cortest = cor.test(.$value, .$geno, method = "pearson", use="pair")
    data.frame(estimate = cortest$estimate, pval = cortest$p.value)
})
ge_mqtl$metabolite = metabolite
ge_mqtl$maxmrk = maxmrk

In cor(x, y): the standard deviation is zero

In [46]:
head(ge_mqtl)

Unnamed: 0,name,estimate,pval,metabolite,maxmrk
1,YAL067C,0.1556179,1,AKG,mrk_37228
2,YAL054C,-0.06836896,1,AKG,mrk_37228
3,YAL049C,0.06459199,1,AKG,mrk_37228
4,YAL048C,-0.02167676,1,AKG,mrk_37228
5,YAL047C,0.005228904,1,AKG,mrk_37228
6,YAL046C,-0.2257095,1,AKG,mrk_37228
