In [None]:
library(ggplot2)
library(tidyverse)
library(lubridate)
library(Seurat)

In [None]:
filename <- "/gpfs/home/meyin/published_data/parabiosis/pb_combined.rds?download=1"
data <- readRDS(filename)

In [None]:
head(data, 10)

In [None]:
dim(data)
dimnames(data)

In [None]:
colnames(data)

In [None]:
colnames(data@meta.data)

In [None]:
table(data@meta.data$Celltype.LowRes)

In [None]:
# Quality Control

In [None]:
metadata <- data@meta.data %>% filter(data@meta.data$Celltype.LowRes != "Doublet")

In [None]:
# Split off Old Control data
ocontroldata <- metadata %>% filter(AgeCond=="O_Control")
head(ocontroldata)

In [None]:
# Split off Old Exercise data
oexercisedata <- metadata %>% filter(AgeCond=="O_Exercise")
head(oexercisedata)

In [None]:
# Split off Young Control data
ycontroldata <- metadata %>% filter(AgeCond=="Y_Control")
head(ycontroldata)

In [None]:
# Split off Young Exercise data
yexercisedata <- metadata %>% filter(AgeCond=="Y_Exercise")
head(yexercisedata)

In [None]:
# Look at differences in transcriptional noise in different cell types + see how they change with age

In [None]:
seurat_list <- SplitObject(data, split.by = "ID")

In [None]:
initial_matrices <- lapply(seurat_list, function(x) GetAssayData(x, slot = "data"))

In [None]:
# Find transcriptional noise across all individuals and compare
noiselist <- c();

idlist <- c("O2", "O3", "O4", "O5", "O6", "O7", "O8", "Y1", "Y2", "Y3", "Y4", "Y5", "Y6", "Y7", "Y8");

experimentalgroup <- c("old control", "old exercise", "old exercise", "old control", "old exercise", "old control", "old exercise", 
         "young control", "young control", "young exercise", "young exercise", "young control", "young exercise", "young control", "young exercise");

cols <- c("violet", "lavender", "lavender", "violet", "lavender", "violet", "lavender",
          "red", "red", "pink", "pink", "red", "pink", "red", "pink")

for (x in 1:15) {
    noise <- (seurat_list[[x]]$nCount_SCT %>% sd())/(seurat_list[[x]]$nCount_SCT %>% mean())
    noiselist <- c(noiselist, noise)
}

In [None]:
idstoname <- function(ids) {
    ids <- unlist(strsplit(ids, "/"))
    genenames <- mapIds(org.Mm.eg.db, keys = ids, column = "SYMBOL", keytype = "ENTREZID", multiVals = "first")
    paste(genenames, collapse = "/")
}

In [None]:
celltypes <- unique(data@meta.data$Celltype.LowRes)

In [None]:
extract_gene_names <- function(df) {
  df <- df %>%
    separate_rows(gene_names, sep = "/")
  
  gene_list <- unique(df$gene_names)
  
  return(gene_list)
}

In [None]:
library(ggrepel)
library(clusterProfiler)
library(org.Mm.eg.db)
library(GOSemSim)

controlid <- c("O2", "O5", "O7")
exerciseid <- c("O3", "O4", "O6", "O8")
controlname <- "OldControl"
exercisename <- "OldExercise"
prefix <- "Old"

metadata$group <- ifelse(metadata$ID %in% controlid, controlname, 
                     ifelse(metadata$ID %in% exerciseid, exercisename, NA))
data@meta.data <- metadata

for (celltype in celltypes) {
    prefix <- "Old"

    print(celltype)
    prefix = paste(celltype, prefix)
    
    de_results <- FindMarkers(subset(data, subset = Celltype.LowRes == celltype), ident.1 = exercisename, ident.2 = controlname, group.by = "group", logfc.threshold = 0)

    de_results <- de_results %>%
                    mutate(significant = ifelse(p_val_adj < 0.005 & abs(avg_log2FC) > 0.25, "yes", "no"))
    significantdata <- de_results %>% filter(significant == "yes")
    
    significantgenes <- rownames(significantdata)

    geneIDs <- list(select(org.Mm.eg.db, keys = significantgenes, columns = c("ENTREZID"), keytype = "SYMBOL")$ENTREZID)

    geneIDs <- as.numeric(as.character(unlist(geneIDs[[1]])))
    
    ego <- enrichGO(gene = geneIDs,
                OrgDb = org.Mm.eg.db,
                keyType = "ENTREZID",
                ont = "BP",
                pAdjustMethod = "BH",
                pvalueCutoff = 0.05,
                qvalueCutoff = 0.05)
    
    if (!is.null(ego)){
        egowithgenenames <- as.data.frame(ego) %>%
            rowwise() %>%
            mutate(gene_names = idstoname(geneID))
            
        egowithgenenames <- ungroup(egowithgenenames)
        
        egowithgenenames <- egowithgenenames %>%
            mutate(TermCoverage = as.numeric(str_extract(GeneRatio, "^[0-9]+"))/as.numeric(str_extract(BgRatio, "^[0-9]+")))
    
        bottom10pvalues <- egowithgenenames[order(egowithgenenames$pvalue),][1:10,]
        
        all_gene_names <- extract_gene_names(bottom10pvalues)

        output_file <- paste0("/gpfs/home/meyin/gene_sets/parabiosis/wilcox_top10/old/", celltype, "_genes.txt")
        
        writeLines(all_gene_names, output_file)
    }
}

In [None]:
library(ggrepel)
library(clusterProfiler)
library(org.Mm.eg.db)
library(GOSemSim)

controlid <- c("Y1", "Y2", "Y5", "Y7")
exerciseid <- c("Y3", "Y4", "Y6", "Y8")
controlname <- "YoungControl"
exercisename <- "YoungExercise"
prefix <- "Young"

metadata$group <- ifelse(metadata$ID %in% controlid, controlname, 
                     ifelse(metadata$ID %in% exerciseid, exercisename, NA))
data@meta.data <- metadata

for (celltype in celltypes) {
    prefix <- "Young"

    print(celltype)
    prefix = paste(celltype, prefix)
    
    de_results <- FindMarkers(subset(data, subset = Celltype.LowRes == celltype), ident.1 = exercisename, ident.2 = controlname, group.by = "group", logfc.threshold = 0)

    de_results <- de_results %>%
                    mutate(significant = ifelse(p_val_adj < 0.005 & abs(avg_log2FC) > 0.25, "yes", "no"))
    significantdata <- de_results %>% filter(significant == "yes")
    
    significantgenes <- rownames(significantdata)

    geneIDs <- list(select(org.Mm.eg.db, keys = significantgenes, columns = c("ENTREZID"), keytype = "SYMBOL")$ENTREZID)

    geneIDs <- as.numeric(as.character(unlist(geneIDs[[1]])))
    
    ego <- enrichGO(gene = geneIDs,
                OrgDb = org.Mm.eg.db,
                keyType = "ENTREZID",
                ont = "BP",
                pAdjustMethod = "BH",
                pvalueCutoff = 0.05,
                qvalueCutoff = 0.05)
    
    if (!is.null(ego)){
        egowithgenenames <- as.data.frame(ego) %>%
            rowwise() %>%
            mutate(gene_names = idstoname(geneID))
            
        egowithgenenames <- ungroup(egowithgenenames)
        
        egowithgenenames <- egowithgenenames %>%
            mutate(TermCoverage = as.numeric(str_extract(GeneRatio, "^[0-9]+"))/as.numeric(str_extract(BgRatio, "^[0-9]+")))
    
        bottom10pvalues <- egowithgenenames[order(egowithgenenames$pvalue),][1:10,]
        
        all_gene_names <- extract_gene_names(bottom10pvalues)

        output_file <- paste0("/gpfs/home/meyin/gene_sets/parabiosis/young", celltype, "_genes.txt")
        
        writeLines(all_gene_names, output_file)
    }
}