In [None]:
# ggplot stuff
library(ggplot2)
library(RColorBrewer)
theme_set(theme_bw())

In [None]:
# preliminaries

# load the expression matrix
data.dir <- "/home/rcortini/work/CRG/projects/sc_hiv/data"
matrix.fname <- sprintf('%s/matrices/exprMatrix.tsv', data.dir)
exprMatrix <- read.table(matrix.fname, header = TRUE, row.names = 1,
                                       sep = "\t", check.names = FALSE)

# load the sample sheet
sample.sheet.fname <- sprintf("%s/metadata/sampleSheet.tsv", data.dir)
sampleSheet <- read.delim(sample.sheet.fname, header = TRUE, row.names = 1)

# load gene annotations file
gene.annotations <- sprintf("%s/matrices/gene_annotations.tsv", data.dir)
gene.data <- read.delim(gene.annotations, header = TRUE, sep = "\t",
                        row.names = 1, stringsAsFactors = FALSE)
gene.data <- subset(gene.data, rownames(gene.data) %in% rownames(exprMatrix))

# remove genes that have no expression
norm.exprMatrix <- exprMatrix[rowSums(exprMatrix)>1, ]

# normalize by row sum
total <- colSums(norm.exprMatrix)
norm.exprMatrix <- t(norm.exprMatrix)
norm.exprMatrix <- norm.exprMatrix / rowSums(norm.exprMatrix)
norm.exprMatrix <- t(norm.exprMatrix)

# do the PCA
exprMatrix.pca <- prcomp(t(norm.exprMatrix), scale = TRUE)
pca <- as.data.frame(exprMatrix.pca$x)

# define alive cells
alive.cells <- rownames(pca)[pca$PC1 < 0]

## Is PUS10 highly expressed in dead cells?

In [None]:
dead.cells <- rownames(pca)[pca$PC1 > 0]
dead <- exprMatrix[, dead.cells]

In [None]:
options(repr.plot.width = 3.5, repr.plot.height = 4)
PUS10 <- "ENSG00000162927.13"
SAHA.treated <- rownames(sampleSheet)[sampleSheet$label == "J-LatA2+SAHA"]
plot(t(exprMatrix[PUS10, dead.cells]),
     t(exprMatrix["FILIONG01", dead.cells]), xlab = "PUS10", ylab = "HIV")

The answer is no.

## Correlation of DE genes with HIV

One question that was asked is whether these misterious differentially expressed genes in the SAHA responders-vs-nonresponders test are positively or negatively correlated with HIV.

In [None]:
# filter out the dead cells now
exprMatrix <- exprMatrix[, alive.cells]
sampleSheet <- subset(sampleSheet, rownames(sampleSheet) %in% alive.cells)

In [None]:
load("../data/DEA/de.genes.SAHAresponders.Rda")
load("../data/DEA/de.genes.PMAresponders.Rda")

In [None]:
# define a data frame that will allow us to plot the genes vs HIV
SAHA.treated <- rownames(sampleSheet)[sampleSheet$label == "J-LatA2+SAHA"]
SAHA <- as.data.frame(t(exprMatrix[de.genes.SAHAresponders$id, SAHA.treated]))
SAHA$hiv <- t(exprMatrix["FILIONG01", SAHA.treated])

In [None]:
# plot everything
options(repr.plot.width = 3, repr.plot.height = 3)
for (i in 1:dim(de.genes.SAHAresponders)[1]) {
    id <- de.genes.SAHAresponders[i, "id"]
    gg <- ggplot(SAHA, aes_string(x = id, y = "hiv")) +
       geom_point(col="darkgreen") + 
       labs(main = "Grey module",
            x = de.genes.SAHAresponders[i, "symbol"],
            y = "HIV")
    print(gg)
}

In [None]:
# define a data frame that will allow us to plot the genes vs HIV
PMA.treated <- rownames(sampleSheet)[sampleSheet$label == "J-LatA2+PMA"]
PMA <- as.data.frame(t(exprMatrix[de.genes.PMAresponders$id, PMA.treated]))
PMA$hiv <- t(exprMatrix["FILIONG01", PMA.treated])

In [None]:
# plot everything
options(repr.plot.width = 3, repr.plot.height = 3)
for (i in 1:dim(de.genes.PMAresponders)[1]) {
    id <- de.genes.PMAresponders[i, "id"]
    gg <- ggplot(PMA, aes_string(x = id, y = "hiv")) +
       geom_point(col="darkgreen") + 
       labs(x = de.genes.PMAresponders[i, "symbol"],
            y = "HIV")
    print(gg)
}