# MS proteomics data analysis

In [None]:
source("https://bioconductor.org/biocLite.R")
biocLite()
biocLite(c("limma"))
library(limma)
library(heatmaply)
library(RColorBrewer)

In [None]:
#Load data
MS = read.table("20180202_OsteoclastProteomes_Protein_groups_filtered_imputed.txt", sep = "\t", head=T)

In [None]:
dim(MS[,1:20])

In [None]:
# Unfiltered MDF plot
pdf("mdsLogMS.pdf")
par(family = "sans")
colpal = brewer.pal(5, "Set2")
plotMDS(MS[,1:20], col = colpal[rep(1:5, each=4)])
dev.off()

In [None]:
#Filter for protein with high abundance variation
coefVarMS = apply(MS[,1:20], 1, function(x) sd(x)/mean(x) )
dim(MS[coefVarMS > 0.02,1:20]) # Keep 22% with highest coefficient of variation

In [None]:
a = Sys.time()
heatmaply(MS[coefVarMS > 0.02,1:20])
b = Sys.time()
print(b-a)

In [None]:
heatmaply(cor(MS[coefVarMS > 0.02,1:20]))

In [None]:
# Filtered MDF plot
pdf("mdsLogMSfilteredCoefVar.pdf")
par(family = "sans")
plotMDS(MS[coefVarMS > 0.02,1:20], col = colpal[rep(1:5, each=4)])
dev.off()

In [None]:
# Load transformed data with t-test results
protFiltered = read.table("Perseus_run/ProteinsTTestFiltered.txt", sep = "\t", head=T, quote="")

In [None]:
names(protFiltered)[2] <- "LogPvalue"
# Color for significant abundance changes
protFiltered$color = ifelse(test = protFiltered$Difference > 0, "#4A91C4", "#F09F4E")
protFiltered[protFiltered$Significant != "+", 99] = "grey"
protFiltered$Gene.names <- as.character(protFiltered$Gene.names)
# Keep only the first gene name when several are provided
protFiltered[,7] <- sub(";.*", "", protFiltered[,7])

In [None]:
# Threshold curve
protFilterCurve = read.table("Perseus_run/ProteinsTTestFilteredCurve.txt", sep = "\t", head=T, quote="")

In [None]:
#pdf("volcanoPlotFiltering.pdf")
par(family = "sans")
ptsToLabelRight = c(139,441,24,38,268,147,112,331,132,307,284,419,181,301,184,306,325,291,10,221,3)
ptsToLabelLeft = c(222,290,208,258,490,151,286,507,451,282,385)
with(protFiltered, plot(LogPvalue ~ Difference, pch=20, col=color, ylab = "log(p-value)",
                       xlab = "mean(RANKL+Arg) - mean(RANKL+Arg+recArg1)"))
with(protFiltered[ptsToLabelLeft,], text(LogPvalue ~ Difference, labels = Gene.names, cex = 0.8, adj = c(-0.2,1)))
with(protFiltered[ptsToLabelRight,], text(LogPvalue ~ Difference, labels = Gene.names, cex = 0.8, adj = c(1.2,1)))
lines(protFilterCurve, lty = 2)
#dev.off()

In [None]:
# Used to find the list of proteins to annotate
# ptsToLabel = which(protFiltered$Significant == "+")
# with(protFiltered[ptsToLabelTest,], text(LogPvalue ~ Difference, labels = Gene.names, col = 2, cex = 0.8, adj = c(-.2,1)))
# ptsToLabelTest = sample(ptsToLabel, 30)
# print(ptsToLabelTest)
# protFiltered[ptsToLabelTest,7]

In [None]:
listDown = protFiltered[protFiltered$color == "#4A91C4",7]
listUp = protFiltered[protFiltered$color == "#F09F4E",7]

In [None]:
for (i in listDown){cat(i);cat("\n")}

In [None]:
for (i in listUp){cat(i);cat("\n")}

In [None]:
for (i in protFiltered[rev(order(protFiltered$LogPvalue))[1:40],7]){cat(i);cat("\n")}

## Export for OmicsIntegrator

In [None]:
pf <- protFiltered
pf$Difference <- abs(pf$Difference)
write.table(pf[pf$Significant == "+",c(13,3)], "OmicsIntegratorRun/recArg1_prot.tsv", row.names = FALSE,
          col.names = FALSE, quote = FALSE, sep = "\t") 

In [None]:
sessionInfo()