# eQTL mQTL overlap

How many eQTLs are also mQTLs and vice versa

In [5]:
.libPaths("~/R/x86_64-redhat-linux-gnu-library/3.2/")
# config opts and libraries
options(repr.plot.width = 6)
options(repr.plot.height = 5)
library(ggplot2);
library(plyr);
library(dplyr);
library(reshape2);
library(LSD);
library(pheatmap);
library(parallel);
options(mc.cores = 24);
library(stringr);
library(RColorBrewer);
library(qtl);
library(funqtl);

In [6]:
# load mQTLs
load("/g/steinmetz/brooks/genphen//metabolome/qtls/mQTLs_comball_funqtl_2014.rda")
mQTLs_funqtl_2014_class = sapply(mQTLs_funqtl_2014,class)
mQTLs_funqtl_2014 = mQTLs_funqtl_2014[mQTLs_funqtl_2014_class!="try-error"]



In [7]:
# load eQTLs
load("/g/steinmetz/brooks/genphen/transcriptome/qtl/eQTL_08032016.rda")

In [8]:
# load transcript annotation
load("/g/steinmetz/brooks/genphen/transcriptome/data/trx_df.rda")

In [146]:
# load genotype and markers files
genotype_f = "/g/steinmetz/brooks/yeast/genomes/S288CxYJM789/genotypes_S288c_R64.rda"
load(genotype_f)

In [147]:
# load data
endo_f = "/g/steinmetz/project/GenPhen/data/endometabolome/data/endometabolite_full_12102015.rda"
load(endo_f)

# use only relaitve data
endometabolite = filter(endometabolite, time_format == "relative")

#head(endometabolite)

In [148]:
# load expression data
load("/g/steinmetz/brooks/genphen/transcriptome/data/trx_df.rda")

In [166]:
head(endometabolite)

Unnamed: 0,strain,metabolite,replicate,time_format,time,value,value.log2,relative.log2,derivative.log2,cellconc,biovol,singlecellvol,peakarea,batch
1,01B,AKG,1,relative,1,64.62115,6.036089,1.0,,,,,,dM1
2,01B,AKG,1,relative,2,75.42941,6.256056,1.036442,0.219967,,,,,dM1
3,01B,AKG,1,relative,3,46.9538,5.583573,0.9250316,-0.6724827,,,,,dM1
4,01B,AKG,1,relative,4,57.4146,5.868257,0.9721952,0.2846837,,,,,dM1
5,01B,AKG,2,relative,1,65.08506,6.046252,1.0,,,,,,dM1
6,01B,AKG,2,relative,2,61.94676,5.97606,0.9883908,-0.07019202,,,,,dM1


In [122]:
if (!exists("mQTL_table")) {
    mQTL_table = do.call(rbind, lapply(names(mQTLs_funqtl_2014),function(m) {
        interval = mQTLs_funqtl_2014[[m]]$qtl_intervals
        if (length(interval)>0) {
            do.call(rbind, lapply(seq(1:length(interval)), function(z){
                thisinterval = interval[[z]]
                highmrk = names(thisinterval[2])
                data.frame(metabolite = m, 
                   as.data.frame(range(thisinterval)),
                   maxlod = mQTLs_funqtl_2014[[m]]$qtls_alt[highmrk,"mlod"], 
                   maxmrk = names(thisinterval[2]))
            }))          
        } else {
            data.frame()
        }
    }))
    mQTL_table$seqnames = sapply(mQTL_table$seqnames,function(x){
        paste(substr(x,1,3),as.roman(substr(x,4,5)),sep="")})
}

In [204]:
head(eQTL_table)

Unnamed: 0,gene,seqnames,start,end,width,strand,maxlod,maxmrk
1,SUT433,chrI,1505,29279,27775,*,18.85243,mrk_79
8,SUT433,chrVIII,309963,469661,159699,*,4.47395,mrk_23688
3,CUT437,chrI,1505,44588,43084,*,7.199353,mrk_96
4,SUT434,chrII,427719,515157,87439,*,3.679556,mrk_4238
5,SUT435,chrI,32556,52164,19609,*,7.917256,mrk_99
6,CUT439,chrXIV,42770,780902,738133,*,2.760089,mrk_42223


In [135]:
mQTL_ranges = GRanges(seqnames = mQTL_table$seqnames, 
              ranges = IRanges(start = mQTL_table$start, end = mQTL_table$end, width = mQTL_table$width))
eQTL_ranges = GRanges(seqnames = eQTL_table$seqnames, 
              ranges = IRanges(start = eQTL_table$start, end = eQTL_table$end, width = eQTL_table$width))

In [144]:
sum(mQTL_table$maxmrk%in%eQTL_table$maxmrk)

In [140]:
countOverlaps(mQTL_ranges, eQTL_ranges)

In [212]:
log2(filter(trx_df, name == "SUT432", strain == "02B")$value + 1)

In [57]:
mQTLs_funqtl_2014[[m]]$qtls_alt[highmrk,"mlod"]

Unnamed: 0,chr,pos,slod,mlod
mrk_37228,13,1065,3.960293,4.736386


In [207]:
f = "ge_me.rda"
if (file.exists(f)) {
    load(f)
} else {
    ge = trx_df %>% group_by(name, strain) %>% summarize(value = median(log2(value + 1)))
    me = endometabolite %>% group_by(metabolite, strain, time ) %>% summarize(value = median(value.log2))
    save(ge, me, file = f)
}


In [208]:
head(ge)

Unnamed: 0,name,strain,value
1,SUT432,01C,0.464358
2,SUT432,02B,-inf
3,SUT432,02C,-inf
4,SUT432,02D,2.077627
5,SUT432,03A,-inf
6,SUT432,03C,-inf


In [205]:
m = "mrk_4238"

genodf = data.frame(geno = geno[m,], strain = names(geno[m,]))

me_tmp = merge(me, genodf, by = "strain" ) %>% filter(metabolite=="HSE",time==2)
#x = filter(ge, )
#cor(ge,geno[m,],method ="pearson")
cor.test(me_tmp$value, me_tmp$geno, method = "pearson", use="pair")

Unnamed: 0,strain,metabolite,time,value,geno
1,01B,HSE,2,11.64172,2.0
2,01C,HSE,2,9.07838,1.0
3,01D,HSE,2,8.944617,
4,02C,HSE,2,10.24209,1.0
5,02D,HSE,2,10.13415,2.0
6,03A,HSE,2,9.640158,2.0



	Pearson's product-moment correlation

data:  me_tmp$value and me_tmp$geno
t = 0.089584, df = 80, p-value = 0.9288
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.2074427  0.2265301
sample estimates:
       cor 
0.01001531 
