In [None]:
library(ggplot2)
library(ggrepel)
library(ggpubr)
library(DESeq2)
library(glmnet)
library(fgsea)
library(GSVA)
library(openxlsx)
library(doMC)
library(dplyr)
registerDoMC(20)

# Define function to map genes between species

In [None]:
computeSpeciesAssay <- function(x, ensembl.mapping = NULL, species = NULL){
    
    from <- "Gene.name"
    to <- paste0(stringr::str_to_title(species), ".gene.name")

    # Process mapping data
    mapping <- ensembl.mapping[apply(ensembl.mapping[, c(from, to)], 1, function(row) 
      all(row != "")), c(from, to)]
    
    newgenes <- mapping[which(mapping[, 1] %in% rownames(x)),]
    # To avoid the many-to-many issue, genes are mapped to the homolog with the shortest name
    newgenes <- newgenes[order(nchar(newgenes[,2])),] 
    newgenes <- newgenes[!duplicated(newgenes[,2]),] 
    
    defect_genes <- which(newgenes[,1] %in% setdiff(newgenes[,1],rownames(x)))
    if ( length(defect_genes) > 0) newgenes <- newgenes[-defect_genes,]
    new_mat <- Matrix::fac2sparse(newgenes[,2]) %*% as.matrix(x[newgenes[,1],])
    
    new_mat <- as.data.frame(as.matrix(new_mat))
    return(new_mat)
}

# Load bulk transcriptomics data and map rat genes to mouse orthologous 

In [None]:
# Mapping file
rat.to.mouse <- read.csv("/projects/mludwig/DVC/data/gene_info/rat_to_mouse.txt")

# Load mouse transcriptomics data
counts.mouse <- read.csv("/projects/mludwig/DVC/data/bulk_rna-seq/processed/counts_mouse.csv",
                         row.names = 1)
outliers.mouse <- c("mouse11", "mouse25", "mouse27", "mouse33")
counts.mouse <- counts.mouse[,!(colnames(counts.mouse) %in% outliers.mouse)]
meta.mouse <- read.csv("/projects/mludwig/DVC/data/bulk_rna-seq/processed/meta_mouse.csv")
meta.mouse <- meta.mouse[!(meta.mouse$sample %in% outliers.mouse),]

# Load rat transcriptomics data
counts.rat.unmapped <- read.csv("/projects/mludwig/DVC/data/bulk_rna-seq/processed/counts_rat.csv", 
                                row.names = 1)
outliers.rat <-  c("rat9", "rat65")
counts.rat.unmapped <- counts.rat.unmapped[,!(colnames(counts.rat.unmapped) %in% outliers.rat)]
meta.rat <- read.csv("/projects/mludwig/DVC/data/bulk_rna-seq/processed/meta_rat.csv")
meta.rat <- meta.rat[!(meta.rat$sample %in% outliers.rat),]

# Map rat genes to mouse orthologues genes
counts.rat <- computeSpeciesAssay(counts.rat.unmapped, ensembl.mapping = rat.to.mouse,
                                  species = "mouse")


# Only include shared genes for downstream analysis
common.genes <- intersect(rownames(counts.mouse), rownames(counts.rat))
counts.mouse <- counts.mouse[common.genes, ]
counts.rat <- counts.rat[common.genes, ]

# Function to perform DESeq2 analysis

In [None]:
compute.DESeq2.stats <- function(counts, meta, group1, group2) {
  
  # Define groups
  groups <- c(group2, group1)
  samples.sub <- meta$sample[meta$treatment %in% groups]
  counts.sub <- counts[, samples.sub]
  meta.sub <- meta[match(samples.sub, meta$sample),]
  
  # Normalization and differential gene expression
  design <- data.frame(condition = meta.sub$treatment, rRNA = meta.sub$rRNA)
  design$condition <- factor(design$condition, levels = groups)
  dds <- DESeqDataSetFromMatrix(counts.sub, DataFrame(design), ~ condition + rRNA)
  
  dds <- DESeq(dds)
  vsd <- vst(dds, blind=F)
  DEGs <- results(dds, name=resultsNames(dds)[2]) 
  DEGs <- data.frame(DEGs)
  return(list(DEGs = DEGs, vsd = vsd))
    
}

# Perform differential gene expression analysis

In [None]:
# Mouse 
DEGs.mouse.acute <- compute.DESeq2.stats(counts = counts.mouse, meta = meta.mouse,
                                         group1 = "A8-A", group2  = "V-A")
saveRDS(DEGs.mouse.acute[["DEGs"]], 
        file = "/projects/mludwig/DVC/output/DEGs/bulk/DESeq2_mouse_acute.rds")

DEGs.mouse.chronic <- compute.DESeq2.stats(counts = counts.mouse, meta = meta.mouse,
                                           group1 = "A8-C", group2  = "WM-C")
saveRDS(DEGs.mouse.chronic[["DEGs"]], 
        file = "/projects/mludwig/DVC/output/DEGs/bulk/DESeq2_mouse_chronic.rds")

DEGs.mouse.lean <- compute.DESeq2.stats(counts = counts.mouse, meta = meta.mouse,
                                        group1 = "L-A", group2  = "V-A")
saveRDS(DEGs.mouse.lean[["DEGs"]], 
        file = "/projects/mludwig/DVC/output/DEGs/bulk/DESeq2_mouse_lean.rds")


# Rat 
DEGs.rat.acute <- compute.DESeq2.stats(counts = counts.rat, meta = meta.rat,
                                        group1 = "A8-A", group2  = "V-A")
saveRDS(DEGs.rat.acute[["DEGs"]], 
        file = "/projects/mludwig/DVC/output/DEGs/bulk/DESeq2_rat_acute.rds")

DEGs.rat.chronic <- compute.DESeq2.stats(counts = counts.rat, meta = meta.rat,
                                         group1 = "A8-C", group2  = "WM-C")
saveRDS(DEGs.rat.chronic[["DEGs"]], 
        file = "/projects/mludwig/DVC/output/DEGs/bulk/DESeq2_rat_chronic.rds")

DEGs.rat.lean <- compute.DESeq2.stats(counts = counts.rat, meta = meta.rat,
                                      group1 = "L-A", group2  = "V-A")
saveRDS(DEGs.rat.lean[["DEGs"]], 
        file = "/projects/mludwig/DVC/output/DEGs/bulk/DESeq2_rat_lean.rds")

# Construct logistic regression classifier

In [None]:
# Logistic regression classifier
vsd.list <- list(mouse.acute = DEGs.mouse.acute[["vsd"]], rat.acute = DEGs.rat.acute[["vsd"]], 
                 mouse.chronic = DEGs.mouse.chronic[["vsd"]], rat.chronic = DEGs.rat.chronic[["vsd"]])


treatment.list <- list(mouse.acute = meta.mouse$treatment[match(colnames(vsd.list[["mouse.acute"]]),
                                                                meta.mouse$sample)],
                       rat.acute = meta.rat$treatment[match(colnames(vsd.list[["rat.acute"]]),
                                                            meta.rat$sample)],
                       mouse.chronic = meta.mouse$treatment[match(colnames(vsd.list[["mouse.chronic"]]),
                                                                  meta.mouse$sample)],
                       rat.chronic = meta.rat$treatment[match(colnames(vsd.list[["rat.chronic"]]),
                                                              meta.rat$sample)])
                       
output <- data.frame(matrix(NA, nrow = length(vsd.list), ncol = 1))
rownames(output) <- names(vsd.list)
colnames(output) <- c("accuracy")

for (k in 1:length(vsd.list)) {
  
  vsd <- vsd.list[[k]]
  rv <- rowVars(assay(vsd))
  ntop <- nrow(assay(vsd))
  select <- order(rv, decreasing = TRUE)[seq_len(min(ntop,length(rv)))]
  pca <- prcomp(t(assay(vsd)[select, ]))
  X <- pca$x
  Y <- treatment.list[[k]]

  prediction <- c()
  for (j in 1:length(Y)) {
    
  weights <- sapply(Y[-j], function (x) sum(Y[-j] != x) / length(Y[-j])) 
  fit <- cv.glmnet(X[-j,], Y[-j], alpha = 1,  type.measure = "class", 
                   nfolds = length(Y[-j]), 
                   standardize = F, intercept = T, family = "binomial", grouped = F,
                   weights = weights)
  
  prediction <- c(prediction, predict(fit, X[j,, drop = F], type="class", s = fit$lambda.min))
  }

  accuracy <- sum(prediction == Y) / length(Y)
  output$accuracy[k] <- accuracy
}

output

# Save 
saveRDS(output, file = "/projects/mludwig/DVC/output/DEGs/bulk/glmnet.rds")