# Tissue enrichment analysis
The file *gene_attribute_matrix.txt* has been downloaded from [Harmonizome](amp.pharm.mssm.edu/Harmonizome/dataset/BioGPS+Mouse+Cell+Type+and+Tissue+Gene+Expression+Profiles) on 2018-06-14 and represents differential expression of 15437 genes in 74 mice cell types or tissues.  
The file *Mouse_Gene_Atlas* has been downloaded from [Enrichr](http://amp.pharm.mssm.edu/Enrichr/#stats) on the same day.  
The file *MouseToHumanSymbols.txt* has been downloaded from [BioMart](www.ensembl.org/biomart/martview/) and maps murine gene names to human orthologues, according to Ensembl Release 92.

In [None]:
library(grDevices)

In [None]:
expressionMatrix = read.table("gene_attribute_matrix.txt", sep = "\t", header = T, comment.char = "")

In [None]:
names(expressionMatrix)[1:2] = c("GeneSym", "ProbeID")
rownames(expressionMatrix) = expressionMatrix[,1]
expressionMatrix = t(expressionMatrix[-c(1,2),-c(1:3)])
expressionMatrix = as.matrix(expressionMatrix)
mode(expressionMatrix) <- "numeric"
n = ncol(expressionMatrix)

In [None]:
mouseToHumans = read.table("MouseToHumanSymbols.txt", head=T, sep = "\t")
mouseToHumans[,1] = as.character(mouseToHumans[,1])
mouseToHumans[,2] = as.character(mouseToHumans[,2])

In [None]:
# Load differentially expressed genes in our study
incSet = unlist(read.csv("RANKL_effect_increased.csv"))
decSet = unlist(read.csv("RANKL_effect_reduced.csv"))

In [None]:
convertToHuman <- function(x){
    newName = unlist(subset(mouseToHumans, Gene.name == x, 2))
    return(ifelse(length(newName) == 1, newName, ''))
}

In [None]:
incSet = sapply(incSet, convertToHuman)
decSet = sapply(decSet, convertToHuman)

In [None]:
# Genes absent of the expression matrix would not be detected, so we discard them
incSet = intersect(incSet, colnames(expressionMatrix))
decSet = intersect(decSet, colnames(expressionMatrix))

In [None]:
x = expressionMatrix[1,]
compareTissueToInc <- function(x, set){
    # Get gene names of overexpressed genes in the tissue
    y = names(x)[which(x == 1)]
    contTab = matrix(c(length(intersect(y, set)), length(setdiff(y, set)), length(setdiff(set, y)), 0), ncol = 2)
    contTab[2,2] = n - sum(contTab)
    ft = fisher.test(contTab, alternative = "greater")
    return(c(pval = ft$p.value, ft$estimate, inter = length(intersect(y, set))))
}

In [None]:
incEnrich = apply(expressionMatrix, 1, function(x) compareTissueToInc(x, incSet))
incEnrich[1,] = -log10(sapply(incEnrich[1,] * ncol(incEnrich), function(x) min(x, 1)))
par(bg = "white")
colpal = colorRampPalette(c("#000000", "#FF0000"))(max(incEnrich[3,]))
plot(incEnrich[2,], incEnrich[1,], col = colpal[incEnrich[3,]], pch = 16,
    xlab = "Odd-ratio", ylab = "-log10(corrected p-value)")
incEnrich = incEnrich[,incEnrich[1,] > 1]
text(incEnrich[2,], incEnrich[1,], labels = colnames(incEnrich), pos = c(1,2,2,2))

In [None]:
decEnrich = apply(expressionMatrix, 1, function(x) compareTissueToInc(x, decSet))
decEnrich[1,] = -log10(sapply(decEnrich[1,] * ncol(decEnrich), function(x) min(x, 1)))
par(bg = "white")
colpal = colorRampPalette(c("#000000", "#FF0000"))(max(decEnrich[3,]))
plot(decEnrich[2,], decEnrich[1,], col = colpal[decEnrich[3,]], pch = 16,
    xlab = "Odd-ratio", ylab = "-log10(corrected p-value)")
decEnrich = decEnrich[,decEnrich[1,] > 1]
text(decEnrich[2,], decEnrich[1,], labels = colnames(decEnrich), pos = 2)

In [None]:
which(rownames(expressionMatrix) == "osteoclasts")

In [None]:
x = expressionMatrix[34,]
y = names(x)[which(x == 1)]
set = incSet
sort(intersect(y, set))
contTab = matrix(c(length(intersect(y, set)), length(setdiff(y, set)), length(setdiff(set, y)), 0), ncol = 2)
contTab[2,2] = n - sum(contTab)
ft = fisher.test(contTab, alternative = "greater")
return(c(pval = ft$p.value, ft$estimate, inter = length(intersect(y, set))))

In [None]:
set = decSet
sort(intersect(y, set))
contTab = matrix(c(length(intersect(y, set)), length(setdiff(y, set)), length(setdiff(set, y)), 0), ncol = 2)
contTab[2,2] = n - sum(contTab)
ft = fisher.test(contTab, alternative = "greater")
return(c(pval = ft$p.value, ft$estimate, inter = length(intersect(y, set))))

In [None]:
expressionMatrix[33:35,9034:9036]

In [None]:
#NB: identifiers are capitalized outdated mouse gene names
fileMGA = file("Mouse_Gene_Atlas",open="r")
rawMGA = readLines(fileMGA)
close(fileMGA)

In [None]:
extractTissue <- function(x){
    return(strsplit(x, "\t")[[1]][1])
}
extractGenes <- function(x){
    geneTissue = sapply(strsplit(strsplit(x, "\t")[[1]][-c(1:2)], ","), function(x) x[[1]])
    return(geneTissue)
}

In [None]:
MGA = lapply(rawMGA, extractGenes)
names(MGA) <- lapply(rawMGA, extractTissue)

In [None]:
compareTissueToInc <- function(x, set){
    # Get gene names of overexpressed genes in the tissue
    y = names(x)[which(x == 1)]
    contTab = matrix(c(length(intersect(y, set)), length(setdiff(y, set)), length(setdiff(set, y)), 0), ncol = 2)
    contTab[2,2] = n - sum(contTab)
    ft = fisher.test(contTab, alternative = "greater")
    return(c(pval = ft$p.value, ft$estimate, inter = length(intersect(y, set))))
}

In [None]:
sort(MGA$osteoclasts)