Clustering, Annotation and DEG Analysis
# module purge && module load shared slurm python/3.7.x-anaconda
# module load gcc/8.3.0
# module load hdf5_18/1.8.17
# module load R/4.0.2-gccmkl
##------------------------------------
## Libraries
rm(list = ls())
library(dplyr)
library(patchwork)
library(ggplot2)
library(reshape2)
library(clustree)
# library(Matrix)
library(Seurat)
# library(SeuratObject)
set.seed(10)
##------------------------------------
## reference gene symbol list
ref <- read.table("/work/psychiatry/rgree2/SNRNASEQ/04_SEURAT_DOUBLETFINDER/gencode.vM17.protein_coding_gene_id_symbol.txt.gz", header = TRUE, sep = "\t")
##------------------------------------
## load DoubletFinder retained count table
load("/work/psychiatry/rgree2/SNRNASEQ/04_SEURAT_DOUBLETFINDER/01_CS/SLEEPSEQ_SEURAT_CS_DOUBLETFINDER.RData")
# seuObjDF
seuObjDFremoved <- subset(seuObjDF, subset = DF.classifications_0.25_0.005_5770 == "Singlet")
cs.data <- GetAssayData(seuObjDFremoved, slot = "counts") # 21967 75240
save(cs.data, file = "SLEEPSEQ_COUNTS_CS_FILTERED.RData")
##------------------------------------
rm(list = ls())
## load combined count table
load("SLEEPSEQ_COUNTS_CS_FILTERED.RData")
# cs.data
## Initialize the Seurat object with the raw (non-normalized data).
seuObj <- CreateSeuratObject(counts = cs.data, project = "SLEEPSEQ_CS")
print(dim(seuObj)) ## 21967 75240
##------------------------------------
## Add and update meta data
metaTemp <- as.data.frame(seuObj@meta.data)
metaTemp2 <- as.data.frame(matrix(unlist(strsplit(row.names(metaTemp), "_")), ncol = 3, byrow = TRUE))
row.names(metaTemp2) <- row.names(metaTemp)
metaData <- merge(metaTemp, metaTemp2, by = "row.names")
row.names(metaData) <- metaData$Row.names
metaData$Row.names <- NULL
colnames(metaData) <- c("Ident", "nUMI", "nGenes", "Condition", "Sample", "CellBarcode")
metaData$Batch <- metaData$Sample
metaData$Batch <- gsub("A1", "1", metaData$Batch)
metaData$Batch <- gsub("A2", "1", metaData$Batch)
metaData$Batch <- gsub("A5", "1", metaData$Batch)
metaData$Batch <- gsub("A6", "1", metaData$Batch)
metaData$Batch <- gsub("X2", "2", metaData$Batch)
metaData$Batch <- gsub("X3", "2", metaData$Batch)
metaSample <- metaData$Sample
names(metaSample) <- row.names(metaData)
metaCond <- metaData$Condition
names(metaCond) <- row.names(metaData)
metaBatch <- metaData$Batch
names(metaBatch) <- row.names(metaData)
seuObj$Condition <- seuObj$orig.ident
seuObj$Sample <- metaSample
seuObj$ConditionSample <- paste(seuObj$Condition, seuObj$Sample, sep = "_")
seuObj$Batch <- metaBatch
##------------------------------------
## Calculate Percent Mito
seuObj[["pMito_RNA"]] <- PercentageFeatureSet(seuObj, pattern = "^mt-")
## Set default identities to GenoAgeSample
Idents(seuObj) <- "ConditionSample"
table(seuObj@active.ident)
# CS_A1 CS_A2 CS_A5 CS_A6 CS_X2 CS_X3
# 9462 9774 12334 10084 18460 15126
seuObj <- subset(seuObj, idents = c("CS_X2", "CS_X3"), invert = TRUE)
table(seuObj@active.ident)
# CS_A1 CS_A2 CS_A5 CS_A6
# 9462 9774 12334 10084
## Visualize Data QC
p1 <- VlnPlot(seuObj, features = c("nFeature_RNA", "nCount_RNA", "pMito_RNA"), ncol = 3, pt.size = 0)
p2 <- FeatureScatter(seuObj, feature1 = "nCount_RNA", feature2 = "pMito_RNA")
p3 <- FeatureScatter(seuObj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
ggsave(filename = "SEURAT_SLEEPSEQ_QC_1.pdf", plot = p1, width = 8, height = 4, units = "in", dpi = 150)
ggsave(filename = "SEURAT_SLEEPSEQ_QC_2.pdf", plot = p2, width = 5, height = 4, units = "in", dpi = 150)
ggsave(filename = "SEURAT_SLEEPSEQ_QC_3.pdf", plot = p3, width = 5, height = 4, units = "in", dpi = 150)
##------------------------------------
## Data Filtering
seuObj <- subset(seuObj, subset = nCount_RNA < 20000 & pMito_RNA < 0.5)
print(dim(seuObj)) ## 21967 41398
##------------------------------------
## Data Normalization
seuObj <- NormalizeData(seuObj)
##------------------------------------
## Identify top 2000 highly variable genes
seuObj <- FindVariableFeatures(seuObj, selection.method = "vst", nfeatures = 2000)
## Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(seuObj), 10)
## plot variable features with and without labels
p4 <- VariableFeaturePlot(seuObj)
p5 <- LabelPoints(plot = p4, points = top10, repel = TRUE)
ggsave(filename = "SEURAT_SLEEPSEQ_VARGENES_1.pdf", plot = p4, width = 8, height = 4, units = "in", dpi = 150)
ggsave(filename = "SEURAT_SLEEPSEQ_VARGENES_2.pdf", plot = p5, width = 8, height = 4, units = "in", dpi = 150)
##------------------------------------
## Data Scaling
all.genes <- rownames(seuObj)
seuObj <- ScaleData(seuObj, features = all.genes, vars.to.regress = c("nCount_RNA", "pMito_RNA"))
##------------------------------------
## PCA & Jackstraw
## Compute PCA
seuObj <- RunPCA(seuObj, features = VariableFeatures(object = seuObj), npcs = 100)
## Compute and Score Jackstraw
seuObj <- JackStraw(seuObj, num.replicate = 100, dims = 100)
seuObj <- ScoreJackStraw(seuObj, dims = 1:100)
## Plot PCA loadings and Jackstraw scores
p6 <- ElbowPlot(seuObj, ndims = 100)
p7 <- JackStrawPlot(seuObj, dims = 1:100)
ggsave(filename = "SEURAT_SLEEPSEQ_PCA_1.pdf", plot = p6, width = 6, height = 4, units = "in", dpi = 150)
ggsave(filename = "SEURAT_SLEEPSEQ_PCA_2.pdf", plot = p7, width = 12, height = 6, units = "in", dpi = 150)
# save(seuObj, file = "SLEEPSEQ_SEURAT_CS_NORM_SCALE_PCA.RData")
##------------------------------------
# rm(list = ls())
# load("SLEEPSEQ_SEURAT_CS_NORM_SCALE_PCA.RData")
# # seuObj
## Data Clustering
## IDENTIFY PCS
pcScores <- seuObj@reductions$pca@jackstraw$overall.p.values
pcScoresNoSign <- pcScores[pcScores[,2] > 0.05,]
selpcs <- min(pcScoresNoSign[,1]) - 1
print(selpcs)
seuObj <- FindNeighbors(seuObj, dims = 1:selpcs)
seuObj <- FindClusters(seuObj, resolution = c(0.2, 0.4, 0.6, 0.8, 1.0, 0.8, 1.4, 1.6, 1.8, 2.0), n.iter = 1000)
seuObj <- RunUMAP(seuObj, dims = 1:selpcs, n.epochs = 200, min.dist = 0.001, spread = 5)
##------------------------------------
## clustree
seutree <- clustree(seuObj, prefix = "RNA_snn_res.", node_colour = "sc3_stability") # + scale_color_brewer(palette = "Set1") + scale_edge_color_continuous(low = "blue", high = "red")
ggsave(filename = "SLEEPSEQ_SEURAT_CS_CLUSTREE.pdf", plot = seutree, width = 24, height = 24, units = "in", dpi = 150)
##------------------------------------
## UMAP plots and nUMI violin plots
## for all resolutions
ResolutionList <- grep("RNA_snn_res", colnames(seuObj@meta.data), value = TRUE)
for (Resolution in ResolutionList)
{
print(paste("====> RESOLUTION ", Resolution, sep = ""))
pdf(paste0("SLEEPSEQ_SEURAT_CS_UMAP_RES_", Resolution, ".pdf"), width = 7, height = 6)
g <- DimPlot(object = seuObj, label = TRUE, reduction = "umap", group.by = Resolution)
print(g)
dev.off()
pdf(paste0("SLEEPSEQ_SEURAT_CS_UMAP_VIOLIN_nUMI_RES_", Resolution, ".pdf"), width = 7, height = 3)
v <- VlnPlot(object = seuObj, features = "nCount_RNA", ncol = 1, pt.size = 0, group.by = Resolution)
print(v)
dev.off()
}
##------------------------------------
## UMAP/Feature plots for marker genes
# mygenes <- c("Mki67", "Sox4", "Sox11", "Foxp1", "Foxp2", "Drd", "Drd2", "Tac1", "Penk", "Casz1", "Aqp4", "Olig1", "Cx3cr1", "Flt1", "Slc17a7", "Dlx2", "Npy", "Ascl1", "Sp9", "Ppp1r1b")
mygenes <- c("Aqp4", "Aldoc", "Tac1", "Clock", "Thy1", "Slc17a6", "Slc17a7", "Gad1", "Foxp1", "Foxp2", "Olig1", "Olig2", "Mobp", "Mbp", "Mag", "Cldn5", "Cx3cr1", "Vtn", "Mrc1")
fpl1 <- FeaturePlot(object = seuObj, features = mygenes, cols = c("gray90", "blue"), reduction = "umap", ncol = 5, pt.size = 0.1, min.cutoff = 0, max.cutoff = 3, order = TRUE, raster = TRUE)
ggsave(filename = "SLEEPSEQ_SEURAT_PLOT_FeaturePlot_orderT.pdf", plot = fpl1, width = 36, height = 24, units = "in", dpi = 300)
fpl2 <- FeaturePlot(object = seuObj, features = mygenes, cols = c("gray90", "blue"), reduction = "umap", ncol = 5, pt.size = 0.1, min.cutoff = 0, max.cutoff = 3, order = FALSE, raster = TRUE)
ggsave(filename = "SLEEPSEQ_SEURAT_PLOT_FeaturePlot_orderF.pdf", plot = fpl2, width = 36, height = 24, units = "in", dpi = 300)
umap1 <- DimPlot(object = seuObj, group.by = "RNA_snn_res.1.4", pt.size = 0.1, label = TRUE, reduction = "umap", raster = TRUE) + NoLegend()
ggsave(filename = "SLEEPSEQ_SEURAT_PLOT_DimPlot_Res_1.4.pdf", plot = umap1, width = 6, height = 6, units = "in", dpi = 300)
##------------------------------------
## Save RData
save(seuObj, file = "SLEEPSEQ_SEURAT_CS.RData")
# ##------------------------------------
# ## Find Cluster Markers
# rm(list = ls())
# load("SLEEPSEQ_SEURAT_CS.RData")
# # seuObj
# Idents(seuObj) <- "RNA_snn_res.1.4"
# seuObj.markers <- FindAllMarkers(seuObj, only.pos = TRUE, min.pct = 0, logfc.threshold = 0)
# write.table(seuObj.markers, "SLEEPSEQ_SEURAT_RES_1.4_CLUSTER_MARKERS.txt", row.names = TRUE, col.names = TRUE, quote = FALSE, sep = "\t")
# module purge && module load shared slurm python/3.7.x-anaconda
# module load gcc/8.3.0
# module load hdf5_18/1.8.17
# module load R/4.0.2-gccmkl
##------------------------------------
## Libraries
rm(list = ls())
library(dplyr)
library(patchwork)
library(ggplot2)
library(reshape2)
library(clustree)
# library(Matrix)
library(Seurat)
# library(SeuratObject)
set.seed(10)
##------------------------------------
## reference gene symbol list
ref <- read.table("/work/psychiatry/rgree2/SNRNASEQ/04_SEURAT_DOUBLETFINDER/gencode.vM17.protein_coding_gene_id_symbol.txt.gz", header = TRUE, sep = "\t")
##------------------------------------
## load DoubletFinder retained count table
load("/work/psychiatry/rgree2/SNRNASEQ/04_SEURAT_DOUBLETFINDER/02_SD/SLEEPSEQ_SEURAT_SD_DOUBLETFINDER.RData")
# seuObjDF
seuObjDFremoved <- subset(seuObjDF, subset = DF.classifications_0.25_0.005_6209 == "Singlet")
sd.data <- GetAssayData(seuObjDFremoved, slot = "counts") # 21967 80353
save(sd.data, file = "SLEEPSEQ_COUNTS_SD_FILTERED.RData")
##------------------------------------
rm(list = ls())
## load combined count table
load("SLEEPSEQ_COUNTS_SD_FILTERED.RData")
# sd.data
## Initialize the Seurat object with the raw (non-normalized data).
seuObj <- CreateSeuratObject(counts = sd.data, project = "SLEEPSEQ_SD")
print(dim(seuObj)) ## 21967 80353
##------------------------------------
## Add and update meta data
metaTemp <- as.data.frame(seuObj@meta.data)
metaTemp2 <- as.data.frame(matrix(unlist(strsplit(row.names(metaTemp), "_")), ncol = 3, byrow = TRUE))
row.names(metaTemp2) <- row.names(metaTemp)
metaData <- merge(metaTemp, metaTemp2, by = "row.names")
row.names(metaData) <- metaData$Row.names
metaData$Row.names <- NULL
colnames(metaData) <- c("Ident", "nUMI", "nGenes", "Condition", "Sample", "CellBarcode")
metaData$Batch <- metaData$Sample
metaData$Batch <- gsub("A3", "1", metaData$Batch)
metaData$Batch <- gsub("A4", "1", metaData$Batch)
metaData$Batch <- gsub("A7", "1", metaData$Batch)
metaData$Batch <- gsub("A8", "1", metaData$Batch)
metaData$Batch <- gsub("X5", "2", metaData$Batch)
metaData$Batch <- gsub("X7", "2", metaData$Batch)
metaSample <- metaData$Sample
names(metaSample) <- row.names(metaData)
metaCond <- metaData$Condition
names(metaCond) <- row.names(metaData)
metaBatch <- metaData$Batch
names(metaBatch) <- row.names(metaData)
seuObj$Condition <- seuObj$orig.ident
seuObj$Sample <- metaSample
seuObj$ConditionSample <- paste(seuObj$Condition, seuObj$Sample, sep = "_")
seuObj$Batch <- metaBatch
##------------------------------------
## Calculate Percent Mito
seuObj[["pMito_RNA"]] <- PercentageFeatureSet(seuObj, pattern = "^mt-")
## Set default identities to GenoAgeSample
Idents(seuObj) <- "ConditionSample"
table(seuObj@active.ident)
# SD_A3 SD_A4 SD_A7 SD_A8 SD_X5 SD_X7
# 9868 10926 12412 10066 17982 19099
seuObj <- subset(seuObj, idents = c("SD_X5", "SD_X7"), invert = TRUE)
table(seuObj@active.ident)
# SD_A3 SD_A4 SD_A7 SD_A8
# 9868 10926 12412 10066
## Visualize Data QC
p1 <- VlnPlot(seuObj, features = c("nFeature_RNA", "nCount_RNA", "pMito_RNA"), ncol = 3, pt.size = 0)
p2 <- FeatureScatter(seuObj, feature1 = "nCount_RNA", feature2 = "pMito_RNA")
p3 <- FeatureScatter(seuObj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
ggsave(filename = "SEURAT_SLEEPSEQ_QC_1.pdf", plot = p1, width = 8, height = 4, units = "in", dpi = 150)
ggsave(filename = "SEURAT_SLEEPSEQ_QC_2.pdf", plot = p2, width = 5, height = 4, units = "in", dpi = 150)
ggsave(filename = "SEURAT_SLEEPSEQ_QC_3.pdf", plot = p3, width = 5, height = 4, units = "in", dpi = 150)
##------------------------------------
## Data Filtering
seuObj <- subset(seuObj, subset = nCount_RNA < 20000 & pMito_RNA < 0.5)
print(dim(seuObj)) ## 21967 80112
##------------------------------------
## Data Normalization
seuObj <- NormalizeData(seuObj)
##------------------------------------
## Identify top 2000 highly variable genes
seuObj <- FindVariableFeatures(seuObj, selection.method = "vst", nfeatures = 2000)
## Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(seuObj), 10)
## plot variable features with and without labels
p4 <- VariableFeaturePlot(seuObj)
p5 <- LabelPoints(plot = p4, points = top10, repel = TRUE)
ggsave(filename = "SEURAT_SLEEPSEQ_VARGENES_1.pdf", plot = p4, width = 8, height = 4, units = "in", dpi = 150)
ggsave(filename = "SEURAT_SLEEPSEQ_VARGENES_2.pdf", plot = p5, width = 8, height = 4, units = "in", dpi = 150)
##------------------------------------
## Data Scaling
all.genes <- rownames(seuObj)
seuObj <- ScaleData(seuObj, features = all.genes, vars.to.regress = c("nCount_RNA", "pMito_RNA"))
##------------------------------------
## PCA & Jackstraw
## Compute PCA
seuObj <- RunPCA(seuObj, features = VariableFeatures(object = seuObj), npcs = 100)
## Compute and Score Jackstraw
seuObj <- JackStraw(seuObj, num.replicate = 100, dims = 100)
seuObj <- ScoreJackStraw(seuObj, dims = 1:100)
## Plot PCA loadings and Jackstraw scores
p6 <- ElbowPlot(seuObj, ndims = 100)
p7 <- JackStrawPlot(seuObj, dims = 1:100)
ggsave(filename = "SEURAT_SLEEPSEQ_PCA_1.pdf", plot = p6, width = 6, height = 4, units = "in", dpi = 150)
ggsave(filename = "SEURAT_SLEEPSEQ_PCA_2.pdf", plot = p7, width = 12, height = 6, units = "in", dpi = 150)
# save(seuObj, file = "SLEEPSEQ_SEURAT_CS_NORM_SCALE_PCA.RData")
##------------------------------------
# rm(list = ls())
# load("SLEEPSEQ_SEURAT_CS_NORM_SCALE_PCA.RData")
# # seuObj
## Data Clustering
## IDENTIFY PCS
pcScores <- seuObj@reductions$pca@jackstraw$overall.p.values
pcScoresNoSign <- pcScores[pcScores[,2] > 0.05,]
selpcs <- min(pcScoresNoSign[,1]) - 1
print(selpcs)
seuObj <- FindNeighbors(seuObj, dims = 1:selpcs)
seuObj <- FindClusters(seuObj, resolution = c(0.2, 0.4, 0.6, 0.8, 1.0, 0.8, 1.4, 1.6, 1.8, 2.0), n.iter = 1000)
seuObj <- RunUMAP(seuObj, dims = 1:selpcs, n.epochs = 200, min.dist = 0.001, spread = 5)
##------------------------------------
## clustree
seutree <- clustree(seuObj, prefix = "RNA_snn_res.", node_colour = "sc3_stability") # + scale_color_brewer(palette = "Set1") + scale_edge_color_continuous(low = "blue", high = "red")
ggsave(filename = "SLEEPSEQ_SEURAT_SD_CLUSTREE.pdf", plot = seutree, width = 24, height = 24, units = "in", dpi = 150)
##------------------------------------
## UMAP plots and nUMI violin plots
## for all resolutions
ResolutionList <- grep("RNA_snn_res", colnames(seuObj@meta.data), value = TRUE)
for (Resolution in ResolutionList)
{
print(paste("====> RESOLUTION ", Resolution, sep = ""))
pdf(paste0("SLEEPSEQ_SEURAT_SD_UMAP_RES_", Resolution, ".pdf"), width = 7, height = 6)
g <- DimPlot(object = seuObj, label = TRUE, reduction = "umap", group.by = Resolution)
print(g)
dev.off()
pdf(paste0("SLEEPSEQ_SEURAT_SD_UMAP_VIOLIN_nUMI_RES_", Resolution, ".pdf"), width = 7, height = 3)
v <- VlnPlot(object = seuObj, features = "nCount_RNA", ncol = 1, pt.size = 0, group.by = Resolution)
print(v)
dev.off()
}
##------------------------------------
## UMAP/Feature plots for marker genes
# mygenes <- c("Mki67", "Sox4", "Sox11", "Foxp1", "Foxp2", "Drd", "Drd2", "Tac1", "Penk", "Casz1", "Aqp4", "Olig1", "Cx3cr1", "Flt1", "Slc17a7", "Dlx2", "Npy", "Ascl1", "Sp9", "Ppp1r1b")
mygenes <- c("Aqp4", "Aldoc", "Tac1", "Clock", "Thy1", "Slc17a6", "Slc17a7", "Gad1", "Foxp1", "Foxp2", "Olig1", "Olig2", "Mobp", "Mbp", "Mag", "Cldn5", "Cx3cr1", "Vtn", "Mrc1")
fpl1 <- FeaturePlot(object = seuObj, features = mygenes, cols = c("gray90", "blue"), reduction = "umap", ncol = 5, pt.size = 0.1, min.cutoff = 0, max.cutoff = 3, order = TRUE, raster = TRUE)
ggsave(filename = "SLEEPSEQ_SEURAT_PLOT_FeaturePlot_orderT.pdf", plot = fpl1, width = 36, height = 24, units = "in", dpi = 300)
fpl2 <- FeaturePlot(object = seuObj, features = mygenes, cols = c("gray90", "blue"), reduction = "umap", ncol = 5, pt.size = 0.1, min.cutoff = 0, max.cutoff = 3, order = FALSE, raster = TRUE)
ggsave(filename = "SLEEPSEQ_SEURAT_PLOT_FeaturePlot_orderF.pdf", plot = fpl2, width = 36, height = 24, units = "in", dpi = 300)
umap1 <- DimPlot(object = seuObj, group.by = "RNA_snn_res.1.4", pt.size = 0.1, label = TRUE, reduction = "umap", raster = TRUE) + NoLegend()
ggsave(filename = "SLEEPSEQ_SEURAT_PLOT_DimPlot_Res_1.4.pdf", plot = umap1, width = 6, height = 6, units = "in", dpi = 300)
##------------------------------------
## Save RData
save(seuObj, file = "SLEEPSEQ_SEURAT_SD.RData")
rm(list = ls())
library(Seurat)
library(dplyr)
library(Matrix)
library(ggplot2)
library(gridExtra)
library(reshape2)
library(ggrepel)
library(reticulate)
library(clustree)
##-------------------------------------------------------
## SEURAT ANALYSIS | LOAD INDIVIDUAL DATA
##-------------------------------------------------------
print("SLEEPSEQ_CS")
load("/work/psychiatry/rgree2/SNRNASEQ/06_SEURAT_woAmbient_woDoublets/01_CS/SLEEPSEQ_SEURAT_CS.RData")
sleepseq.cs <- seuObj
rm(seuObj)
DefaultAssay(sleepseq.cs) <- "RNA"
print(dim(sleepseq.cs))
print("SLEEPSEQ_SD")
load("/work/psychiatry/rgree2/SNRNASEQ/06_SEURAT_woAmbient_woDoublets/02_SD/SLEEPSEQ_SEURAT_SD.RData")
sleepseq.sd <- seuObj
rm(seuObj)
DefaultAssay(sleepseq.sd) <- "RNA"
print(dim(sleepseq.sd))
##-------------------------------------------------------
## SEURAT ANALYSIS | INTEGRATE DATA
##-------------------------------------------------------
## create a list of seurat objects
sleepseq.list <- c(sleepseq.cs, sleepseq.sd)
## standard preprocessing, identify variable features individually
# normalize and identify variable features for each dataset independently
sleepseq.list <- lapply(X = sleepseq.list, FUN = function(x) {
x <- NormalizeData(x)
x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})
# select features that are repeatedly variable across datasets for integration
sleepseq.features <- SelectIntegrationFeatures(object.list = sleepseq.list)
## identify integration anchors
sleepseq.anchors <- FindIntegrationAnchors(object.list = sleepseq.list, anchor.features = sleepseq.features)
## integrate datasets using anchors
sleepseq.integrated <- IntegrateData(anchorset = sleepseq.anchors)
## scaling, pca and clustering
## switch to integrated assay
DefaultAssay(sleepseq.integrated) <- "integrated"
# save(sleepseq.integrated, sleepseq.anchors, file = "SEURAT_ORGANOIDS_DATA_FILT_NORM_INTEGRATED.RData")
# Run the standard workflow for visualization and clustering
myres <- c(0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0)
sleepseq.integrated <- ScaleData(sleepseq.integrated, verbose = FALSE)
sleepseq.integrated <- RunPCA(sleepseq.integrated, npcs = 30, verbose = FALSE)
sleepseq.integrated <- RunUMAP(sleepseq.integrated, reduction = "pca", dims = 1:30, n.epochs = 200, min.dist = 0.001, spread = 5)
sleepseq.integrated <- FindNeighbors(object = sleepseq.integrated, dims = 1:30)
sleepseq.integrated <- FindClusters(object = sleepseq.integrated, resolution = myres, n.iter = 100)
save(sleepseq.integrated, sleepseq.anchors, file = "SLEEPSEQ_DATA_FILT_NORM_PCA_CLU_INTEGRATED.RData")
##-------------------------------------------------------
## SEURAT ANALYSIS | FIGURES
##-------------------------------------------------------
seutree <- clustree(sleepseq.integrated, prefix = "integrated_snn_res.", node_colour = "sc3_stability") # + scale_color_brewer(palette = "Set1") + scale_edge_color_continuous(low = "blue", high = "red")
ggsave(filename = "SLEEPSEQ_DATA_FILT_NORM_PCA_CLU_INTEGRATED_CLUSTREE.pdf", plot = seutree, width = 24, height = 24, units = "in", dpi = 150)
# ResolutionList <- grep("integrated_snn_res", colnames(sleepseq.integrated@meta.data), value = TRUE)
# for (Resolution in ResolutionList){
# print(Resolution)
# pdf(paste0("EP_ORG_DATA_FILT_NORM_PCA_CLU_INTEGRATED_UMAP_RES_", Resolution, ".pdf"), width=7, height=7)
# g <- DimPlot(object = sleepseq.integrated, label = TRUE, reduction = "umap", group.by = Resolution)
# print(g)
# dev.off()
# }
ResolutionList <- grep("integrated_snn_res", colnames(sleepseq.integrated@meta.data), value = TRUE)
for (Resolution in ResolutionList)
{
print(Resolution)
pdf(paste0("SLEEPSEQ_DATA_FILT_NORM_PCA_CLU_INTEGRATED_UMAP_RES_", Resolution, ".pdf"), width=8, height=7)
g <- DimPlot(object = sleepseq.integrated, label = TRUE, reduction = "umap", group.by = Resolution, raster = TRUE)
print(g)
dev.off()
pdf(paste0("SLEEPSEQ_DATA_INTEGRATE_VIOLIN_nUMI_RES_", Resolution, ".pdf"), width = 9, height = 3)
v <- VlnPlot(object = sleepseq.integrated, features = "nCount_RNA", ncol = 1, pt.size = 0, group.by = Resolution)
print(v)
dev.off()
}
plotCluUMAP1 <- DimPlot(object = sleepseq.integrated, group.by = "Condition", pt.size = 0.1, reduction = "umap", label = TRUE, label.size = 3, repel = TRUE, raster = TRUE)
ggsave(filename = "SEURAT_SLEEPSEQ_DATA_UMAP_CONDITION.pdf", plot = plotCluUMAP1, width = 9, height = 8, units = "in", dpi = 150)
plotCluUMAP1b <- DimPlot(object = sleepseq.integrated, group.by = "Condition", split.by = "Condition", pt.size = 0.1, reduction = "umap", label = FALSE, label.size = 3, repel = TRUE, raster = TRUE)
ggsave(filename = "SEURAT_SLEEPSEQ_DATA_UMAP_CONDITION_FACET.pdf", plot = plotCluUMAP1b, width = 17, height = 8, units = "in", dpi = 150)
mygenes <- c("Aqp4", "Aldoc", "Tac1", "Clock", "Thy1", "Slc17a6", "Slc17a7", "Gad1", "Foxp1", "Foxp2", "Olig1", "Olig2", "Mobp", "Mbp", "Mag", "Cldn5", "Cx3cr1", "Vtn", "Mrc1")
DefaultAssay(sleepseq.integrated) <- "RNA"
fpl1 <- FeaturePlot(object = sleepseq.integrated, features = mygenes, cols = c("gray90", "blue"), reduction = "umap", ncol = 3, pt.size = 0.05, min.cutoff = 0, max.cutoff = 3, order = TRUE, raster = TRUE)
ggsave(filename = "SEURAT_SLEEPSEQ_DATA_FeaturePlot_orderT.pdf", plot = fpl1, width = 14, height = 28, units = "in", dpi = 150, useDingbats=FALSE, )
fpl2 <- FeaturePlot(object = sleepseq.integrated, features = mygenes, cols = c("gray90", "blue"), reduction = "umap", ncol = 3, pt.size = 0.05, min.cutoff = 0, max.cutoff = 3, order = FALSE, raster = TRUE)
ggsave(filename = "SEURAT_SLEEPSEQ_DATA_FeaturePlot_orderF.pdf", plot = fpl2, width = 14, height = 28, units = "in", dpi = 150, useDingbats=FALSE)
##-------------------------------------------------------
## SEURAT ANALYSIS | CLUSTER MARKERS
##-------------------------------------------------------
rm(list = ls())
load("SLEEPSEQ_DATA_FILT_NORM_PCA_CLU_INTEGRATED.RData")
# sleepseq.integrated
DefaultAssay(object = sleepseq.integrated) <- "integrated"
Idents(sleepseq.integrated) <- "integrated_snn_res.1.2"
seuMarkers <- FindAllMarkers(object = sleepseq.integrated)
write.table(seuMarkers, "SLEEPSEQ_DATA_FILT_NORM_PCA_CLU_INTEGRATED_DEG.txt", row.names = FALSE, col.names = TRUE, quote = FALSE, sep = "\t")
save(seuMarkers, file = "SLEEPSEQ_DATA_FILT_NORM_PCA_CLU_INTEGRATED_DEG.RData")
##-------------------------------------------------------
## SEURAT ANALYSIS | ANNOTATE CLUSTERS
##-------------------------------------------------------
rm(list = ls())
library(Seurat)
library(dplyr)
library(Matrix)
library(ggplot2)
library(gridExtra)
library(reshape2)
library(ggrepel)
library(reticulate)
library(WGCNA)
library(dplyr)
library(tidyr)
## Load cluster markers
cluMarkers <- read.table("SLEEPSEQ_DATA_FILT_NORM_PCA_CLU_INTEGRATED_DEG.txt", sep = "\t", header = TRUE)
tab <- cluMarkers
tab <- tab[tab$p_val_adj <= 0.05 & tab$avg_log2FC >= 0.75 & tab$pct.1 >= 0.75,]
# tab <- tab[tab$p_val_adj <= 0.05 & tab$avg_log2FC >= 0.25,]
# tab <- tab[tab$p_val_adj <= 0.05 & tab$pct.1 >= 0.25,]
tab <- tab[c(7,6)]
tab$cluster <- as.factor(paste("Cluster_", sprintf("%02d", as.numeric(as.character(tab$cluster))), sep=""))
colnames(tab)=c("Gene","DEFINITION")
tab$DEFINITION <- as.factor(tab$DEFINITION)
Genes=as.data.frame(table(tab$DEFINITION))
## Load reference data (BICCN)
biccn_master <- read.table("/work/psychiatry/rgree2/RESOURCES/DATASETS/2021_BICCN/2020-03-03645C-s4/Supplementary_Table_6.txt", header = TRUE, sep = ",")
biccn_master_up <- biccn_master[biccn_master$direction == "up",]
biccn_master_up2 <- biccn_master_up[,c("genes", "cl1_cluster_label")]
colnames(biccn_master_up2) <- c("gene", "celltype")
biccn_list <- split(biccn_master_up2, biccn_master_up2$celltype)
biccn_list2 <- lapply(biccn_list, function(x) { x[1:50,] } )
GeneSets <- biccn_list2
for(i in 1:length(GeneSets)){
colnames(GeneSets[[i]])[1] <- "Gene"
}
ln=length(GeneSets)
cl=length(Genes$Var1)
TEMP=list()
INT=list()
for (i in 1:ln)
{
TEMP[[i]]=tab[tab$Gene %in% GeneSets[[i]]$Gene,]
INT[[i]]=as.data.frame(table(TEMP[[i]]$DEFINITION))
}
names(INT)=names(GeneSets)
names(TEMP)=names(GeneSets)
# Create the matrix for each GeneSet
NROWS <- sapply(GeneSets,nrow)
#
# GeneSet
# + -
# +-------+-------+
# + | a | b |
# Module +-------+-------+
# - | c | d |
# +-------+-------+
for (i in 1:length(INT))
{
INT[[i]]$b <- NROWS[[i]]-INT[[i]]$Freq
INT[[i]]$c <- Genes$Freq-INT[[i]]$Freq
INT[[i]]$d <- 15585-Genes$Freq-nrow(GeneSets[[i]]) #19776 #15585 #13517 #nrow(tab) #8321 # nrow(cluMarkers)(genes in seurat object)
}
# sum(Genes$Freq)
RunFisher <- function(row, alt = 'greater', cnf = 0.85) {
f <- fisher.test(matrix(row, nrow = 2), alternative = alt, conf.level = cnf)
return(c(row,
P_val = f$p.value,
LogP = -log10(f$p.value),
OR = f$estimate[[1]],
OR_Low = f$conf.int[1],
OR_Up = f$conf.int[2]))
}
# run
FisherMat=list()
for (i in 1:length(INT))
{
FisherMat[[i]] <- t(apply(INT[[i]][,2:5], 1, RunFisher))
rownames(FisherMat[[i]]) <- INT[[i]]$Var1
FisherMat[[i]] <- FisherMat[[i]][rownames(FisherMat[[i]]) != "grey",]
}
names(FisherMat)<-names(INT)
save(FisherMat, TEMP, file= "SLEEPSEQ_SEURAT_FisherOutput_BICCN_Enrich.RData")
# Create matrices of Pval
tmp<-list()
FisherP<-matrix()
rowNames <- rownames(FisherMat[[1]])
colNames <- names(FisherMat)
for (i in 1:length(INT))
{
tmp[[i]] <- cbind(as.numeric(FisherMat[[i]][,5]))
FisherP <- do.call(cbind,tmp)
}
rownames(FisherP) <- rowNames
colnames(FisherP) <- colNames
# Create matrices of OR
tmp<-list()
FisherOR<-matrix()
rowNames <- rownames(FisherMat[[1]])
colNames <- names(FisherMat)
for (i in 1:length(INT))
{
tmp[[i]] <- cbind(as.numeric(FisherMat[[i]][,6]))
FisherOR <- do.call(cbind,tmp)
}
rownames(FisherOR) <- rowNames
colnames(FisherOR) <- colNames
# Pval Adjusted
library(magrittr)
FisherAdj <- FisherP %>% as.matrix %>% as.vector %>% p.adjust(method='fdr') %>% matrix(ncol=ncol(FisherP))
rownames(FisherAdj) <- rowNames
colnames(FisherAdj) <- colNames
FisherAdj[FisherAdj>0.05]=1
FisherOR[FisherOR < 1]=0
pdf("SLEEPSEQ_SEURAT_FisherOutput_BICCN_Enrich.pdf", width=48, height=24, pointsize=12)
par(mar=c(15, 7, 2, 2))
df=-log10(FisherAdj)
LabelMat = paste(signif(FisherOR, 2), "\n(",signif(FisherAdj, 1), ")", sep = "")
LabelMat[LabelMat == "0\n(1)"] <- NA
labeledHeatmap(Matrix = df,
xLabels = colnames(df),
yLabels = rownames(df),
colorLabels =FALSE,
colors=colorRampPalette(c("white", "red"))(50),
textMatrix=LabelMat,
setStdMargins = FALSE,
cex.text = 0.5,
xLabelsAngle = 90)
dev.off()
FisherORt <- as.data.frame(t(FisherOR))
colnames(FisherORt) <- paste(colnames(FisherORt), "OR", sep = "_")
FisherAdjt <- as.data.frame(t(FisherAdj))
colnames(FisherAdjt) <- paste(colnames(FisherAdjt), "Pval", sep = "_")
FisherData <- merge(FisherORt, FisherAdjt, by = "row.names")
row.names(FisherData) <- FisherData$Row.names
FisherData$Row.names <- NULL
FisherData2 <- FisherData[,order(colnames(FisherData))]
write.table(FisherData2, "SLEEPSEQ_SEURAT_FisherOutput_BICCN_Enrich_PlotData.txt", row.names = T, col.names = T, quote = F, sep = "\t")
sink("SLEEPSEQ_SEURAT_BICCN_AnnotationData.txt")
aa <- FisherData2
for(i in 1:ncol(aa))
{
if(i %% 2 != 0)
{
cluor <- i
clup <- i + 1
print("------------------")
bb <- aa[,c(cluor, clup)]
cc <- bb[order(bb[,1], decreasing = T),]
dd <- bb[order(bb[,2], decreasing = F),]
print(gsub("_OR", "", colnames(cc)[1]))
print(cc[1:5,])
print(dd[1:5,])
print("------------------")
}
}
sink()
##-------------------------------------------------------
## SEURAT ANALYSIS | PLOTS & DATA
##-------------------------------------------------------
rm(list = ls())
library(Seurat)
library(dplyr)
library(Matrix)
library(ggplot2)
library(gridExtra)
library(reshape2)
library(ggrepel)
library(reticulate)
library(clustree)
library(scCustomize)
load("SLEEPSEQ_DATA_FILT_NORM_PCA_CLU_INTEGRATED.RData")
# sleepseq.integrated
DefaultAssay(object = sleepseq.integrated) <- "integrated"
Idents(sleepseq.integrated) <- "integrated_snn_res.1.2"
sleepseq.integrated@active.ident <- factor(x = sleepseq.integrated@active.ident, levels = seq(0, max(as.numeric(sleepseq.integrated$integrated_snn_res.1.2) - 1)))
## UMAP PLOTS
p1 <- DimPlot_scCustom(seurat_object = sleepseq.integrated, pt.size = 0.01, group.by = "integrated_snn_res.1.2", raster = TRUE, ggplot_default_colors = TRUE, label = TRUE, label.size = 3) + NoLegend()
ggsave(filename = "SEURAT_SLEEPSEQ_UMAP_CLUSTERS_RES_1.2.pdf", plot = p1, width = 6, height = 6, units = "in", dpi = 300)
p2 <- DimPlot_scCustom(seurat_object = sleepseq.integrated, pt.size = 0.01, group.by = "Condition", ggplot_default_colors = TRUE, raster = TRUE, label = FALSE) #+ NoLegend()
ggsave(filename = "SEURAT_SLEEPSEQ_UMAP_CLUSTERS_CONDITION.pdf", plot = p2, width = 6.5, height = 6, units = "in", dpi = 300)
p3a <- DimPlot_All_Samples(seurat_object = sleepseq.integrated, meta_data_column = "ConditionSample", num_col = 6, pt.size = 0.01, raster = TRUE, label = FALSE, colors_use = DiscretePalette_scCustomize(num_colors = 8, palette = "varibow")) #+ NoLegend()
ggsave(filename = "SEURAT_SLEEPSEQ_UMAP_CLUSTERS_SAMPLE_FACET.pdf", plot = p3a, width = 18, height = 6, units = "in", dpi = 300)
p3b <- DimPlot_scCustom(seurat_object = sleepseq.integrated, pt.size = 0.01, group.by = "ConditionSample", colors_use = DiscretePalette_scCustomize(num_colors = 8, palette = "varibow"), raster = TRUE, label = FALSE) #+ NoLegend()
ggsave(filename = "SEURAT_SLEEPSEQ_UMAP_CLUSTERS_SAMPLE.pdf", plot = p3b, width = 7, height = 6, units = "in", dpi = 300)
## BARPLOT PLOTS
cellsPerCluster <- as.data.frame.matrix(table(sleepseq.integrated$integrated_snn_res.1.2, sleepseq.integrated@meta.data$Condition))
cellsPerCluster$Cluster <- paste("Cluster", sprintf("%02d", as.numeric(row.names(cellsPerCluster))), sep = "_")
write.table(cellsPerCluster, "SEURAT_SLEEPSEQ_CELLS_BY_CLUSTER_BY_CONDITION.tsv", row.names = FALSE, col.names = TRUE, quote = FALSE, sep = "\t")
cellsPerCluster2 <- melt(cellsPerCluster)
colnames(cellsPerCluster2) <- c("CLUSTER", "CONDITION", "CELLS")
p4 <- ggplot(cellsPerCluster2) +
geom_bar(aes(x = CLUSTER, y = CELLS, fill = CONDITION), stat = "identity", position = "fill") +
scale_y_continuous(labels = scales::percent_format()) +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
NULL
ggsave(filename = "SEURAT_SLEEPSEQ_CELLS_BY_CLUSTER_BY_CONDITION.pdf", plot = p4, width = 14, height = 4, units = "in", dpi = 300)
samplePerCluster <- as.data.frame.matrix(table(sleepseq.integrated$integrated_snn_res.1.2, sleepseq.integrated@meta.data$ConditionSample))
samplePerCluster$Cluster <- paste("Cluster", sprintf("%02d", as.numeric(row.names(samplePerCluster))), sep = "_")
write.table(samplePerCluster, "SEURAT_SLEEPSEQ_CELLS_BY_CLUSTER_BY_SAMPLE.tsv", row.names = FALSE, col.names = TRUE, quote = FALSE, sep = "\t")
samplePerCluster2 <- melt(samplePerCluster)
colnames(samplePerCluster2) <- c("CLUSTER", "SAMPLE", "CELLS")
p5 <- ggplot(samplePerCluster2) +
geom_bar(aes(x = CLUSTER, y = CELLS, fill = SAMPLE), stat = "identity", position = "fill") +
scale_y_continuous(labels = scales::percent_format()) +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
NULL
ggsave(filename = "SEURAT_SLEEPSEQ_CELLS_BY_CLUSTER_BY_SAMPLE.pdf", plot = p5, width = 14, height = 4, units = "in", dpi = 300)
## VOILIN PLOTS
p6 <- Stacked_VlnPlot(seurat_object = sleepseq.integrated, features = c("nCount_RNA", "nFeature_RNA"), x_lab_rotate = TRUE, ggplot_default_colors = TRUE)
ggsave(filename = "SEURAT_SLEEPSEQ_VIOLINPLOT_nUMI_nGENES.pdf", plot = p6, width = 14, height = 4, units = "in", dpi = 300)
DefaultAssay(object = sleepseq.integrated) <- "RNA"
mygenes <- c("Aqp4", "Aldoc", "Tac1", "Clock", "Thy1", "Slc17a6", "Slc17a7", "Gad1", "Foxp1", "Foxp2", "Olig1", "Olig2", "Mobp", "Mbp", "Mag", "Cldn5", "Cx3cr1", "Vtn", "Mrc1")
p7 <- Stacked_VlnPlot(seurat_object = sleepseq.integrated, features = mygenes, x_lab_rotate = TRUE, ggplot_default_colors = TRUE)
ggsave(filename = "SEURAT_SLEEPSEQ_VIOLINPLOT_MARKER_GENES.pdf", plot = p7, width = 14, height = 14, units = "in", dpi = 300)
## FEATURE PLOTS
DefaultAssay(object = sleepseq.integrated) <- "RNA"
mygenes <- c("Aqp4", "Aldoc", "Tac1", "Clock", "Thy1", "Slc17a6", "Slc17a7", "Gad1", "Foxp1", "Foxp2", "Olig1", "Olig2", "Mobp", "Mbp", "Mag", "Cldn5", "Cx3cr1", "Vtn", "Mrc1")
p8 <- FeaturePlot_scCustom(seurat_object = sleepseq.integrated, features = mygenes, order = FALSE, pt.size = 0.01, raster = TRUE, num_columns = 5)
ggsave(filename = "SEURAT_SLEEPSEQ_FEATUREPLOT_MARKER_GENES.pdf", plot = p8, width = 21, height = 14, units = "in", dpi = 300)
p9 <- Cluster_Highlight_Plot(seurat_object = sleepseq.integrated, cluster_name = "6", highlight_color = "forestgreen", background_color = "lightgray")
ggsave(filename = "TEMP_UMAP_HIGHLIGHT.pdf", plot = p9, width = 7, height = 6, units = "in", dpi = 300)
##-------------------------------------------------------
## SEURAT ANALYSIS | UPDATE ANNOTATION
##-------------------------------------------------------
rm(list = ls())
library(Seurat)
library(dplyr)
library(Matrix)
library(ggplot2)
library(gridExtra)
library(reshape2)
library(ggrepel)
library(reticulate)
library(clustree)
library(scCustomize)
load("SLEEPSEQ_DATA_FILT_NORM_PCA_CLU_INTEGRATED.RData")
# sleepseq.integrated
DefaultAssay(object = sleepseq.integrated) <- "integrated"
Idents(sleepseq.integrated) <- "integrated_snn_res.1.2"
sleepseq.integrated@active.ident <- factor(x = sleepseq.integrated@active.ident, levels = seq(0, max(as.numeric(sleepseq.integrated$integrated_snn_res.1.2) - 1)))
sleepseq.meta <- as.data.frame(sleepseq.integrated@meta.data)
sleepseq.meta$CellBarcode <- row.names(sleepseq.meta)
sleepseq.meta$Cluster <- paste("Cluster", sprintf("%02d", as.numeric(sleepseq.integrated@active.ident) - 1), sep = "_")
annotation <- read.table("SEURAT_SLEEPSEQ_ANNOTATION_BY_CLUSTER.tsv", sep = "\t", header = TRUE)
sleepseq.meta.updated <- merge(sleepseq.meta, annotation, by = "Cluster")
annot1 <- sleepseq.meta.updated$Annotation1
names(annot1) <- sleepseq.meta.updated$CellBarcode
annot2 <- sleepseq.meta.updated$Annotation2
names(annot2) <- sleepseq.meta.updated$CellBarcode
sleepseq.integrated$Annotation1 <- annot1
sleepseq.integrated$Annotation2 <- annot2
sleepseq.integrated$Annotation3 <- paste(sleepseq.integrated$Annotation1, sleepseq.integrated$integrated_snn_res.1.2, sep = "_")
sleepseq.integrated$Annotation4 <- paste(sleepseq.integrated$Annotation2, sleepseq.integrated$integrated_snn_res.1.2, sep = "_")
Idents(sleepseq.integrated) <- "Annotation1"
p0 <- Cluster_Highlight_Plot(seurat_object = sleepseq.integrated, cluster_name = "UN", highlight_color = "grey50", background_color = "grey90", raster = TRUE)
ggsave(filename = "SEURAT_SLEEPSEQ_UMAP_CELLTYPE_0.pdf", plot = p0, width = 7, height = 6, units = "in", dpi = 300)
table(sleepseq.integrated$Annotation1)
# Astro Endo EX IN Micro Oligo OPC Peri PVM UN VLMC
# 8788 721 42045 10062 4468 7973 2511 486 228 4481 2312
sleepseq.integrated <- subset(sleepseq.integrated, idents = "UN", invert = TRUE)
table(sleepseq.integrated$Annotation1)
# Astro Endo EX IN Micro Oligo OPC Peri PVM VLMC
# 8788 721 42045 10062 4468 7973 2511 486 228 2312
## UMAP PLOTS
p1 <- DimPlot_scCustom(seurat_object = sleepseq.integrated, pt.size = 0.01, group.by = "Annotation1", raster = TRUE, label = TRUE, label.size = 3, colors_use = DiscretePalette_scCustomize(num_colors = length(unique(sort(sleepseq.integrated$Annotation1))), palette = "varibow")) + NoLegend()
ggsave(filename = "SEURAT_SLEEPSEQ_UMAP_CELLTYPE_1.pdf", plot = p1, width = 6, height = 6, units = "in", dpi = 300)
p2 <- DimPlot_scCustom(seurat_object = sleepseq.integrated, pt.size = 0.01, group.by = "Annotation2", raster = TRUE, label = FALSE, colors_use = DiscretePalette_scCustomize(num_colors = length(unique(sort(sleepseq.integrated$Annotation2))), palette = "varibow")) #+ NoLegend()
ggsave(filename = "SEURAT_SLEEPSEQ_UMAP_CELLTYPE_2.pdf", plot = p2, width = 9, height = 6, units = "in", dpi = 300)
p2b <- DimPlot_scCustom(seurat_object = sleepseq.integrated, pt.size = 0.01, group.by = "Annotation2", raster = TRUE, label = TRUE, label.size = 2, colors_use = DiscretePalette_scCustomize(num_colors = length(unique(sort(sleepseq.integrated$Annotation2))), palette = "varibow")) + NoLegend()
ggsave(filename = "SEURAT_SLEEPSEQ_UMAP_CELLTYPE_2b.pdf", plot = p2b, width = 6, height = 6, units = "in", dpi = 300)
p3 <- DimPlot_scCustom(seurat_object = sleepseq.integrated, pt.size = 0.01, group.by = "Annotation4", raster = TRUE, label = TRUE, label.size = 2, colors_use = DiscretePalette_scCustomize(num_colors = length(unique(sort(sleepseq.integrated$Annotation4))), palette = "varibow")) + NoLegend()
ggsave(filename = "SEURAT_SLEEPSEQ_UMAP_CELLTYPE_3.pdf", plot = p3, width = 6, height = 6, units = "in", dpi = 300)
## VOILIN PLOTS
p6 <- Stacked_VlnPlot(seurat_object = sleepseq.integrated, group.by = "Annotation1", features = c("nCount_RNA", "nFeature_RNA"), x_lab_rotate = TRUE, ggplot_default_colors = TRUE)
ggsave(filename = "SEURAT_SLEEPSEQ_CELLTYPE_1_VIOLINPLOT_nUMI_nGENES.pdf", plot = p6, width = 8, height = 4, units = "in", dpi = 300)
p7 <- Stacked_VlnPlot(seurat_object = sleepseq.integrated, group.by = "Annotation2", features = c("nCount_RNA", "nFeature_RNA"), x_lab_rotate = TRUE, ggplot_default_colors = TRUE)
ggsave(filename = "SEURAT_SLEEPSEQ_CELLTYPE_2_VIOLINPLOT_nUMI_nGENES.pdf", plot = p7, width = 12, height = 4, units = "in", dpi = 300)
p8 <- Stacked_VlnPlot(seurat_object = sleepseq.integrated, group.by = "Annotation4", features = c("nCount_RNA", "nFeature_RNA"), x_lab_rotate = TRUE, ggplot_default_colors = TRUE)
ggsave(filename = "SEURAT_SLEEPSEQ_CELLTYPE_3_VIOLINPLOT_nUMI_nGENES.pdf", plot = p8, width = 16, height = 4, units = "in", dpi = 300)
## BARPLOT PLOTS
cellsPerCelltype <- as.data.frame.matrix(table(sleepseq.integrated$Annotation1, sleepseq.integrated@meta.data$Condition))
cellsPerCelltype$CellType <- row.names(cellsPerCelltype)
write.table(cellsPerCelltype, "SEURAT_SLEEPSEQ_CELLS_BY_CELLTYPE_1_BY_CONDITION.tsv", row.names = FALSE, col.names = TRUE, quote = FALSE, sep = "\t")
cellsPerCelltype2 <- melt(cellsPerCelltype)
colnames(cellsPerCelltype2) <- c("CELLTYPE", "CONDITION", "CELLS")
p4 <- ggplot(cellsPerCelltype2) +
geom_bar(aes(x = CELLTYPE, y = CELLS, fill = CONDITION), stat = "identity", position = "fill") +
scale_y_continuous(labels = scales::percent_format()) +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
NULL
ggsave(filename = "SEURAT_SLEEPSEQ_CELLS_BY_CELLTYPE_1_BY_CONDITION.pdf", plot = p4, width = 14, height = 4, units = "in", dpi = 300)
cellsPerCelltype <- as.data.frame.matrix(table(sleepseq.integrated$Annotation2, sleepseq.integrated@meta.data$Condition))
cellsPerCelltype$CellType <- row.names(cellsPerCelltype)
write.table(cellsPerCelltype, "SEURAT_SLEEPSEQ_CELLS_BY_CELLTYPE_2_BY_CONDITION.tsv", row.names = FALSE, col.names = TRUE, quote = FALSE, sep = "\t")
cellsPerCelltype2 <- melt(cellsPerCelltype)
colnames(cellsPerCelltype2) <- c("CELLTYPE", "CONDITION", "CELLS")
p4 <- ggplot(cellsPerCelltype2) +
geom_bar(aes(x = CELLTYPE, y = CELLS, fill = CONDITION), stat = "identity", position = "fill") +
scale_y_continuous(labels = scales::percent_format()) +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
NULL
ggsave(filename = "SEURAT_SLEEPSEQ_CELLS_BY_CELLTYPE_2_BY_CONDITION.pdf", plot = p4, width = 14, height = 4, units = "in", dpi = 300)
cellsPerCelltype <- as.data.frame.matrix(table(sleepseq.integrated$Annotation4, sleepseq.integrated@meta.data$Condition))
cellsPerCelltype$CellType <- row.names(cellsPerCelltype)
write.table(cellsPerCelltype, "SEURAT_SLEEPSEQ_CELLS_BY_CELLTYPE_4_BY_CONDITION.tsv", row.names = FALSE, col.names = TRUE, quote = FALSE, sep = "\t")
cellsPerCelltype2 <- melt(cellsPerCelltype)
colnames(cellsPerCelltype2) <- c("CELLTYPE", "CONDITION", "CELLS")
p4 <- ggplot(cellsPerCelltype2) +
geom_bar(aes(x = CELLTYPE, y = CELLS, fill = CONDITION), stat = "identity", position = "fill") +
scale_y_continuous(labels = scales::percent_format()) +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
NULL
ggsave(filename = "SEURAT_SLEEPSEQ_CELLS_BY_CELLTYPE_4_BY_CONDITION.pdf", plot = p4, width = 14, height = 4, units = "in", dpi = 300)
save(sleepseq.integrated, file = "SLEEPSEQ_DATA_FILT_NORM_PCA_CLU_INTEGRATED_ANNOTATED.RData")
##-------------------------------------------------------
## SEURAT ANALYSIS | UPDATE ANNOTATION
##-------------------------------------------------------
rm(list = ls())
library(Seurat)
library(dplyr)
library(Matrix)
library(ggplot2)
library(gridExtra)
library(reshape2)
library(ggrepel)
library(reticulate)
library(clustree)
library(scCustomize)
load("SLEEPSEQ_DATA_FILT_NORM_PCA_CLU_INTEGRATED_ANNOTATED.RData")
# sleepseq.integrated
sleepseq.integrated$Annotation5 <- gsub("EX_L2-3_IT", "EX_IT", sleepseq.integrated$Annotation2)
sleepseq.integrated$Annotation5 <- gsub("EX_L4-5_IT", "EX_IT", sleepseq.integrated$Annotation5)
sleepseq.integrated$Annotation5 <- gsub("EX_L5_IT", "EX_IT", sleepseq.integrated$Annotation5)
sleepseq.integrated$Annotation5 <- gsub("EX_L6_IT", "EX_IT", sleepseq.integrated$Annotation5)
sleepseq.integrated$Annotation5 <- gsub("EX_L5_ET", "EX_ET_NP_CT", sleepseq.integrated$Annotation5)
sleepseq.integrated$Annotation5 <- gsub("EX_L5-6_NP", "EX_ET_NP_CT", sleepseq.integrated$Annotation5)
sleepseq.integrated$Annotation5 <- gsub("EX_L6_CT", "EX_ET_NP_CT", sleepseq.integrated$Annotation5)
sleepseq.integrated$Annotation5 <- gsub("EX_L6b", "EX_ET_NP_CT", sleepseq.integrated$Annotation5)
sleepseq.integrated$Annotation5 <- gsub("IN_Lamp5", "IN", sleepseq.integrated$Annotation5)
sleepseq.integrated$Annotation5 <- gsub("IN_Pvalb", "IN", sleepseq.integrated$Annotation5)
sleepseq.integrated$Annotation5 <- gsub("IN_Sncg", "IN", sleepseq.integrated$Annotation5)
sleepseq.integrated$Annotation5 <- gsub("IN_Sst", "IN", sleepseq.integrated$Annotation5)
sleepseq.integrated$Annotation5 <- gsub("IN_Vip", "IN", sleepseq.integrated$Annotation5)
## UMAP PLOTS
p1 <- DimPlot_scCustom(seurat_object = sleepseq.integrated, pt.size = 0.01, group.by = "Annotation5", raster = TRUE, label = TRUE, label.size = 3, colors_use = DiscretePalette_scCustomize(num_colors = length(unique(sort(sleepseq.integrated$Annotation5))), palette = "varibow")) + NoLegend()
ggsave(filename = "SEURAT_SLEEPSEQ_UMAP_CELLTYPE_4.pdf", plot = p1, width = 6, height = 6, units = "in", dpi = 300)
# module purge && module load shared slurm python/3.7.x-anaconda
# module load gcc/8.3.0
# module load hdf5_18/1.8.17
# module load R/4.0.2-gccmkl
rm(list = ls())
library(Seurat)
library(dplyr)
library(Matrix)
library(ggplot2)
library(gridExtra)
library(reshape2)
library(ggrepel)
library(reticulate)
library(plyr)
library(tidyverse)
library(tidyr)
library(patchwork)
library(scran)
library(scater)
library(SingleCellExperiment)
library(edgeR)
##-------------------------------------------------------
## DEG | PSEUDOBULK
##-------------------------------------------------------
load("/work/psychiatry/rgree2/SNRNASEQ/06_SEURAT_woAmbient_woDoublets/04_CS_SD_USE_THIS/SLEEPSEQ_DATA_FILT_NORM_PCA_CLU_INTEGRATED_ANNOTATED.RData")
# sleepseq.integrated
sleepseq.integrated$Batch_cDNA <- gsub("CS_A1|CS_A2|SD_A3|SD_A4", 1, sleepseq.integrated$ConditionSample)
sleepseq.integrated$Batch_cDNA <- gsub("CS_A5|CS_A6|SD_A7|SD_A8", 2, sleepseq.integrated$Batch_cDNA)
DefaultAssay(sleepseq.integrated) <- "RNA"
sleepseq.integrated <- NormalizeData(sleepseq.integrated)
table(sleepseq.integrated@meta.data$Condition)
# CS SD
# 38048 41546
table(sleepseq.integrated@meta.data$ConditionSample)
# CS_A1 CS_A2 CS_A5 CS_A6 SD_A3 SD_A4 SD_A7 SD_A8
# 6812 9496 11916 9824 9541 10447 12030 9528
table(sleepseq.integrated@meta.data$Annotation1)
# Astro Endo EX IN Micro Oligo OPC Peri PVM VLMC
# 8788 721 42045 10062 4468 7973 2511 486 228 2312
table(sleepseq.integrated@meta.data$Annotation2)
# Astrocytes Endothelial EX_L2-3_IT EX_L4-5_IT
# 8788 721 12448 8660
# EX_L5_ET EX_L5_IT EX_L5-6_NP EX_L6_CT
# 2549 5488 1583 7527
# EX_L6_IT EX_L6b IN_Lamp5 IN_Pvalb
# 3154 636 769 3071
# IN_Sncg IN_Sst IN_Vip Microglia
# 2771 2232 1219 4468
# Oligodendrocytes OPC Pericytes PVM
# 7973 2511 486 228
# VLMC
# 2312
table(sleepseq.integrated@meta.data$Annotation4)
# Astrocytes_0 Astrocytes_25 Astrocytes_47 Endothelial_28
# 7605 1078 105 591
# Endothelial_43 EX_L2-3_IT_11 EX_L2-3_IT_22 EX_L2-3_IT_4
# 130 2800 1386 4188
# EX_L2-3_IT_40 EX_L2-3_IT_44 EX_L2-3_IT_5 EX_L4-5_IT_16
# 159 127 3788 2175
# EX_L4-5_IT_37 EX_L4-5_IT_59 EX_L4-5_IT_7 EX_L4-5_IT_9
# 189 30 3204 3062
# EX_L5_ET_13 EX_L5_IT_15 EX_L5_IT_8 EX_L5-6_NP_19
# 2549 2350 3138 1583
# EX_L6_CT_17 EX_L6_CT_2 EX_L6_CT_48 EX_L6_CT_51
# 2128 5218 103 78
# EX_L6_IT_12 EX_L6_IT_29 EX_L6b_27 IN_Lamp5_26
# 2610 544 636 769
# IN_Pvalb_10 IN_Pvalb_36 IN_Pvalb_58 IN_Sncg_21
# 2828 206 37 1464
# IN_Sncg_23 IN_Sst_18 IN_Sst_42 IN_Sst_50
# 1307 1897 150 96
# IN_Sst_53 IN_Sst_60 IN_Vip_24 Microglia_3
# 61 28 1219 4468
# Oligodendrocytes_1 Oligodendrocytes_32 Oligodendrocytes_33 OPC_14
# 7152 429 392 2415
# OPC_49 Pericytes_30 PVM_35 VLMC_20
# 96 486 228 1540
# VLMC_31 VLMC_38 VLMC_45
# 475 181 116
Idents(sleepseq.integrated) <- "Annotation2"
table(sleepseq.integrated@active.ident)
# Astrocytes Endothelial EX_L2-3_IT EX_L4-5_IT
# 8788 721 12448 8660
# EX_L5_ET EX_L5_IT EX_L5-6_NP EX_L6_CT
# 2549 5488 1583 7527
# EX_L6_IT EX_L6b IN_Lamp5 IN_Pvalb
# 3154 636 769 3071
# IN_Sncg IN_Sst IN_Vip Microglia
# 2771 2232 1219 4468
# Oligodendrocytes OPC Pericytes PVM
# 7973 2511 486 228
# VLMC
# 2312
for (myclu in unique(sort(sleepseq.integrated@active.ident)))
{
## select a cell-type
cluSelected <- myclu
print(cluSelected)
sleepseq.selected <- subset(x = sleepseq.integrated, idents = c(cluSelected))
## create SCE object for all genes
sleepseqSCE <- as.SingleCellExperiment(sleepseq.selected)
sleepseqGroups <- colData(sleepseqSCE)[, c("ConditionSample", "Condition")]
sleepseqPseudoSCE <- sumCountsAcrossCells(sleepseqSCE, sleepseqGroups)
sleepseqAggMat <- sleepseqPseudoSCE@assays@data$sum
colnames(sleepseqAggMat) <- colData(sleepseqPseudoSCE)[['ConditionSample']]
## filter genes
csExp = which(rowSums(sleepseqAggMat[, grepl('CS', colnames(sleepseqAggMat))] <= 10) == 0) %>% names
sdExp = which(rowSums(sleepseqAggMat[, grepl('SD', colnames(sleepseqAggMat))] <= 10) == 0) %>% names
expgns = Reduce(union, list(csExp, sdExp))
## create SCE object for filtered genes
DefaultAssay(sleepseq.selected) <- "RNA"
selSCE <- as.SingleCellExperiment(sleepseq.selected)
selGroups <- colData(selSCE)[, c("ConditionSample", "Condition", "Sample", "Batch_cDNA")]
selPseudoSCE <- sumCountsAcrossCells(selSCE, selGroups)
selGroups <- colData(selPseudoSCE)[, c("ConditionSample", "Condition", "Sample", "Batch_cDNA")]
selGroups$ConditionSample <- factor(selGroups$ConditionSample)
selGroups$Condition <- factor(selGroups$Condition)
selGroups$Sample <- factor(selGroups$Sample)
selGroups$Batch_cDNA <- factor(selGroups$Batch_cDNA)
selAggMat <- selPseudoSCE@assays@data$sum
colnames(selAggMat) = colData(selPseudoSCE)[['ConditionSample']]
selAggMat <- selAggMat[expgns,]
## perform differential gene expression
selDGEL <- DGEList(counts = selAggMat)
selDGEL <- calcNormFactors(selDGEL)
selDesign <- model.matrix(~Batch_cDNA + Condition, data = selGroups)
selDGEL <- estimateDisp(selDGEL, selDesign)
selFit <- glmFit(selDGEL,selDesign)
selLrt <- glmLRT(selFit)
selRes <- topTags(selLrt, n = nrow(selAggMat), sort.by = 'PValue') %>% as.data.frame
degTableSel <- subset(selRes, FDR <= 0.05)
## write dge table
write.table(selRes, paste("SEURAT_SLEEPSEQ_DATA_", cluSelected, "_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", sep = ""), row.names = T, col.names = T, quote = F, sep = "\t")
## average gene expression
sleepseq.selected$CellType <- paste(Idents(sleepseq.selected), sleepseq.selected$Condition, sep = "_")
Idents(sleepseq.selected) <- "CellType"
avg.exp <- as.data.frame(log1p(AverageExpression(sleepseq.selected, verbose = FALSE)$RNA))
avg.exp$Gene <- row.names(avg.exp)
# avgExp <- avg.exp[row.names(avg.exp) %in% row.names(selRes),]
degavg <- merge(avg.exp, selRes, by = "row.names", all.y = TRUE)
row.names(degavg) <- degavg$Row.names
degavg$Row.names <- NULL
## write average gene expression table
write.table(avg.exp, paste("SEURAT_SLEEPSEQ_DATA_", cluSelected, "_AVG_EXP.txt", sep = ""), row.names = T, col.names = T, quote = F, sep = "\t")
## save dge table and avg gene expression
save(selRes, avg.exp, degavg, file = paste("SEURAT_SLEEPSEQ_DATA_", cluSelected, "_PSEUDOBULK.RData", sep = ""))
}
# module purge && module load shared slurm python/3.7.x-anaconda
# module load gcc/8.3.0
# module load hdf5_18/1.8.17
# module load R/4.0.2-gccmkl
rm(list = ls())
# .libPaths("/home2/akulk1/R/x86_64-redhat-linux-gnu-library/3.5")
library(Seurat)
library(dplyr)
library(Matrix)
library(ggplot2)
library(gridExtra)
library(reshape2)
library(ggrepel)
library(reticulate)
library(RColorBrewer)
library(UpSetR)
##-------------------------------------------------------
## DEGs
##-------------------------------------------------------
filterDEGs <- function(degtable, pvalcutoff, l2fccutoff)
{
deg.cs.sd.temp <- read.table(degtable, sep = "\t", header = TRUE)
deg.cs.sd <- deg.cs.sd.temp
deg.cs.sd.sig.up <- deg.cs.sd[deg.cs.sd$FDR <= pvalcutoff & deg.cs.sd$logFC >= l2fccutoff,]
deg.cs.sd.sig.dn <- deg.cs.sd[deg.cs.sd$FDR <= pvalcutoff & deg.cs.sd$logFC <= -l2fccutoff,]
return(list("UP" = row.names(deg.cs.sd.sig.up), "DOWN" = row.names(deg.cs.sd.sig.dn)))
}
# degtable <- "SEURAT_SLEEPSEQ_DATA_Astrocytes_SD_x_CS_DEG_TABLE_WILCOX_FULL.txt"
# degtable <- "SEURAT_SLEEPSEQ_DATA_Astrocytes_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt"
pvalcutoff <- 0.05
l2fccutoff <- 0.1375 # 10% difference
astro_list <- filterDEGs("SEURAT_SLEEPSEQ_DATA_Astrocytes_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
endo_list <- filterDEGs("SEURAT_SLEEPSEQ_DATA_Endothelial_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
exl23it_list <- filterDEGs("SEURAT_SLEEPSEQ_DATA_EX_L2-3_IT_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
exl45it_list <- filterDEGs("SEURAT_SLEEPSEQ_DATA_EX_L4-5_IT_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
exl5et_list <- filterDEGs("SEURAT_SLEEPSEQ_DATA_EX_L5_ET_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
exl5it_list <- filterDEGs("SEURAT_SLEEPSEQ_DATA_EX_L5_IT_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
exl56np_list <- filterDEGs("SEURAT_SLEEPSEQ_DATA_EX_L5-6_NP_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
exl6b_list <- filterDEGs("SEURAT_SLEEPSEQ_DATA_EX_L6b_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
exl6ct_list <- filterDEGs("SEURAT_SLEEPSEQ_DATA_EX_L6_CT_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
exl6it_list <- filterDEGs("SEURAT_SLEEPSEQ_DATA_EX_L6_IT_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
inlamp5_list <- filterDEGs("SEURAT_SLEEPSEQ_DATA_IN_Lamp5_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
inpvalb_list <- filterDEGs("SEURAT_SLEEPSEQ_DATA_IN_Pvalb_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
insncg_list <- filterDEGs("SEURAT_SLEEPSEQ_DATA_IN_Sncg_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
insst_list <- filterDEGs("SEURAT_SLEEPSEQ_DATA_IN_Sst_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
invip_list <- filterDEGs("SEURAT_SLEEPSEQ_DATA_IN_Vip_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
micro_list <- filterDEGs("SEURAT_SLEEPSEQ_DATA_Microglia_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
oligo_list <- filterDEGs("SEURAT_SLEEPSEQ_DATA_Oligodendrocytes_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
opc_list <- filterDEGs("SEURAT_SLEEPSEQ_DATA_OPC_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
peri_list <- filterDEGs("SEURAT_SLEEPSEQ_DATA_Pericytes_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
pvm_list <- filterDEGs("SEURAT_SLEEPSEQ_DATA_PVM_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
vlmc_list <- filterDEGs("SEURAT_SLEEPSEQ_DATA_VLMC_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
degcountlist <- list("astro_up" = length(astro_list$UP), "astro_dn" = length(astro_list$DOWN),
"endo_up" = length(endo_list$UP), "endo_dn" = length(endo_list$DOWN),
"exl23it_up" = length(exl23it_list$UP), "exl23it_dn" = length(exl23it_list$DOWN),
"exl45it_up" = length(exl45it_list$UP), "exl45it_dn" = length(exl45it_list$DOWN),
"exl5et_up" = length(exl5et_list$UP), "exl5et_dn" = length(exl5et_list$DOWN),
"exl5it_up" = length(exl5it_list$UP), "exl5it_dn" = length(exl5it_list$DOWN),
"exl56np_up" = length(exl56np_list$UP), "exl56np_dn" = length(exl56np_list$DOWN),
"exl6b_up" = length(exl6b_list$UP), "exl6b_dn" = length(exl6b_list$DOWN),
"exl6ct_up" = length(exl6ct_list$UP), "exl6ct_dn" = length(exl6ct_list$DOWN),
"exl6it_up" = length(exl6it_list$UP), "exl6it_dn" = length(exl6it_list$DOWN),
"inlamp5_up" = length(inlamp5_list$UP), "inlamp5_dn" = length(inlamp5_list$DOWN),
"inpvalb_up" = length(inpvalb_list$UP), "inpvalb_dn" = length(inpvalb_list$DOWN),
"insncg_up" = length(insncg_list$UP), "insncg_dn" = length(insncg_list$DOWN),
"insst_up" = length(insst_list$UP), "insst_dn" = length(insst_list$DOWN),
"invip_up" = length(invip_list$UP), "invip_dn" = length(invip_list$DOWN),
"micro_up" = length(micro_list$UP), "micro_dn" = length(micro_list$DOWN),
"oligo_up" = length(oligo_list$UP), "oligo_dn" = length(oligo_list$DOWN),
"opc_up" = length(opc_list$UP), "opc_dn" = length(opc_list$DOWN),
"peri_up" = length(peri_list$UP), "peri_dn" = length(peri_list$DOWN),
"pvm_up" = length(pvm_list$UP), "pvm_dn" = length(pvm_list$DOWN),
"vlmc_up" = length(vlmc_list$UP), "vlmc_dn" = length(vlmc_list$DOWN))
degup <- as.data.frame(t(as.data.frame(degcountlist[grepl("_up", names(degcountlist))])))
degup$celltype <- gsub("_up", "", row.names(degup))
degdn <- as.data.frame(t(as.data.frame(degcountlist[grepl("_dn", names(degcountlist))])))
degdn$celltype <- gsub("_dn", "", row.names(degdn))
degcounts <- merge(degup, degdn, by = "celltype", all.x = TRUE, incomparables = 0)
colnames(degcounts) <- c("CELLTYPE", "UP", "DOWN")
degcounts[is.na(degcounts)] <- 0
write.table(degcounts, "SLEEPSEQ_DEG_COMPARISON_L2FC_0.1375.tsv", row.names = FALSE, col.names = TRUE, quote = FALSE, sep = "\t")
p1 <- ggplot(degcounts) +
geom_hline(yintercept = 0) +
geom_segment(aes(x = CELLTYPE, xend = CELLTYPE, y = 0, yend = UP)) +
geom_point(data = degcounts, aes(x = CELLTYPE, y = UP, color = CELLTYPE)) +
geom_segment(aes(x = CELLTYPE, xend = CELLTYPE, y = 0, yend = -DOWN)) +
geom_point(data = degcounts, aes(x = CELLTYPE, y = -DOWN, color = CELLTYPE)) +
theme_bw() +
ylim(-500,500) + #ylim(-150,150) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
theme(legend.position = "none") +
xlab("CellTypes") +
ylab("# DEGs") +
NULL
ggsave(filename = "SLEEPSEQ_DEG_COMPARISON_L2FC_0.1375.pdf", plot = p1, width = 8, height = 4, units = "in", dpi = 300, useDingbats = FALSE)
load("/work/psychiatry/rgree2/SNRNASEQ/06_SEURAT_woAmbient_woDoublets/04_CS_SD_USE_THIS/SLEEPSEQ_DATA_FILT_NORM_PCA_CLU_INTEGRATED_ANNOTATED.RData")
# sleepseq.integrated
##-------------------------------------------------------
## PREPARE REFERENCE GENE LISTS
## ASD genes
load("ASD_SFARI.RData")
# GeneSets
asd_human <- as.vector(GeneSets$ASD$Gene)
asd_mouse <- na.omit(convert_human_to_mouse_symbols(symbols = asd_human)) # 1045 genes
asdscored_human <- as.vector(GeneSets$ASD_Scored$Gene)
asdscored_mouse <- na.omit(convert_human_to_mouse_symbols(symbols = asdscored_human))
rm(GeneSets)
## Chris Cowan Mef2c
load("GeneSets_Mef2c_cKO.RData")
# GeneSets
mef2c_cko_up <- as.vector(GeneSets$Mef2c_cKO_UpReg$Gene) # 478 genes
mef2c_cko_dn <- as.vector(GeneSets$Mef2c_cKO_DownReg$Gene) # 598 genes
rm(GeneSets)
## DEGs comparing WT to HDAC4cn
hd4cn_deg <- read.table("DEG_HD4CN_X_MCH_CTX.txt", header = TRUE, sep = "\t")
hd4cn_up <- hd4cn_deg[hd4cn_deg$FDR <= 0.05 & hd4cn_deg$log2FC >= 0.3, "Gene"] # 323 genes
hd4cn_dn <- hd4cn_deg[hd4cn_deg$FDR <= 0.05 & hd4cn_deg$log2FC <= -0.3, "Gene"] # 313 genes
## Synaptic plasticity genes
sscgenesTemp <- as.data.frame(scan("Synaptic_Shaping_Components.txt", what = "", sep = "\n")) #279 genes
sscgenes <- as.vector(unique(sscgenesTemp[,1])) ## 865 genes
# ## create a list for all reference gene lists
# myref <- list("01_MEF2C_CKO_DEG_UP" = mef2c_cko_up,
# "02_HD4CN_DEG_UP" = hd4cn_up,
# "03_MEF2C_CKO_DEG_DOWN" = mef2c_cko_dn,
# "04_HD4CN_DEG_DOWN" = hd4cn_dn,
# "05_SSC_GENES" = sscgenes,
# "06_ASD_ALL_GENES" = asd_mouse)
# save(myref, file = "SLEEPSEQ_REFERENCE_GENE_LISTS.RData")
myref <- list("01_SSC_GENES" = sscgenes,
"02_ASD_GENES" = asd_mouse,
"03_MEF2C_CKO_DEG" = c(mef2c_cko_up, mef2c_cko_dn),
"04_HD4CN_DEG" = c(hd4cn_up, hd4cn_dn))
# myref <- list("01_MEF2C_CKO_DEG" = c(mef2c_cko_up, mef2c_cko_dn),
# "02_HD4CN_DEG" = c(hd4cn_up, hd4cn_dn),
# "03_SSC_GENES" = sscgenes,
# "04_ASD_ALL_GENES" = asd_mouse)
##-------------------------------------------------------
## READ & FILTER DEG TABLES
##-------------------------------------------------------
## READ & FILTER DEG TABLES
filterDEGs <- function(degtable, pvalcutoff, l2fccutoff)
{
deg.cs.sd.temp <- read.table(degtable, sep = "\t", header = TRUE)
deg.cs.sd <- deg.cs.sd.temp
deg.cs.sd.sig.up <- deg.cs.sd[deg.cs.sd$FDR <= pvalcutoff & deg.cs.sd$logFC >= l2fccutoff,]
deg.cs.sd.sig.dn <- deg.cs.sd[deg.cs.sd$FDR <= pvalcutoff & deg.cs.sd$logFC <= -l2fccutoff,]
return(list("UP" = row.names(deg.cs.sd.sig.up), "DOWN" = row.names(deg.cs.sd.sig.dn)))
}
pvalcutoff <- 0.05
l2fccutoff <- 0.1375 # 10% difference
astro_list <- filterDEGs("./../SEURAT_SLEEPSEQ_DATA_Astrocytes_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
endo_list <- filterDEGs("./../SEURAT_SLEEPSEQ_DATA_Endothelial_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
exl23it_list <- filterDEGs("./../SEURAT_SLEEPSEQ_DATA_EX_L2-3_IT_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
exl45it_list <- filterDEGs("./../SEURAT_SLEEPSEQ_DATA_EX_L4-5_IT_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
exl5et_list <- filterDEGs("./../SEURAT_SLEEPSEQ_DATA_EX_L5_ET_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
exl5it_list <- filterDEGs("./../SEURAT_SLEEPSEQ_DATA_EX_L5_IT_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
exl56np_list <- filterDEGs("./../SEURAT_SLEEPSEQ_DATA_EX_L5-6_NP_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
exl6b_list <- filterDEGs("./../SEURAT_SLEEPSEQ_DATA_EX_L6b_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
exl6ct_list <- filterDEGs("./../SEURAT_SLEEPSEQ_DATA_EX_L6_CT_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
exl6it_list <- filterDEGs("./../SEURAT_SLEEPSEQ_DATA_EX_L6_IT_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
inlamp5_list <- filterDEGs("./../SEURAT_SLEEPSEQ_DATA_IN_Lamp5_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
inpvalb_list <- filterDEGs("./../SEURAT_SLEEPSEQ_DATA_IN_Pvalb_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
insncg_list <- filterDEGs("./../SEURAT_SLEEPSEQ_DATA_IN_Sncg_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
insst_list <- filterDEGs("./../SEURAT_SLEEPSEQ_DATA_IN_Sst_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
invip_list <- filterDEGs("./../SEURAT_SLEEPSEQ_DATA_IN_Vip_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
micro_list <- filterDEGs("./../SEURAT_SLEEPSEQ_DATA_Microglia_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
oligo_list <- filterDEGs("./../SEURAT_SLEEPSEQ_DATA_Oligodendrocytes_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
opc_list <- filterDEGs("./../SEURAT_SLEEPSEQ_DATA_OPC_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
peri_list <- filterDEGs("./../SEURAT_SLEEPSEQ_DATA_Pericytes_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
pvm_list <- filterDEGs("./../SEURAT_SLEEPSEQ_DATA_PVM_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
vlmc_list <- filterDEGs("./../SEURAT_SLEEPSEQ_DATA_VLMC_SD_x_CS_DEG_TABLE_PSEUDOBULK_FULL.txt", pvalcutoff, l2fccutoff)
sleepdeg <-list("01_EX_L23IT_DEG_UP" = exl23it_list$UP, "02_EX_L23IT_DEG_DOWN" = exl23it_list$DOWN,
"03_EX_L45IT_DEG_UP" = exl45it_list$UP, "04_EX_L45IT_DEG_DOWN" = exl45it_list$DOWN,
"05_EX_L5IT_DEG_UP" = exl5it_list$UP, "06_EX_L5IT_DEG_DOWN" = exl5it_list$DOWN,
"07_EX_L6IT_DEG_UP" = exl6it_list$UP, "08_EX_L6IT_DEG_DOWN" = exl6it_list$DOWN,
"09_EX_L5ET_DEG_UP" = exl5et_list$UP, "10_EX_L5ET_DEG_DOWN" = exl5et_list$DOWN,
"11_EX_L56NP_DEG_UP" = exl56np_list$UP, "12_EX_L56NP_DEG_DOWN" = exl56np_list$DOWN,
"13_EX_L6B_DEG_UP" = exl6b_list$UP, "14_EX_L6B_DEG_DOWN" = exl6b_list$DOWN,
"15_EX_L6CT_DEG_UP" = exl6ct_list$UP, "16_EX_L6CT_DEG_DOWN" = exl6ct_list$DOWN,
"17_IN_Lamp5_DEG_UP" = inlamp5_list$UP, "18_IN_Lamp5_DEG_DOWN" = inlamp5_list$DOWN,
"19_IN_Pvalb_DEG_UP" = inpvalb_list$UP, "20_IN_Pvalb_DEG_DOWN" = inpvalb_list$DOWN,
"21_IN_Sncg_DEG_UP" = insncg_list$UP, "22_IN_Sncg_DEG_DOWN" = insncg_list$DOWN,
"23_IN_Sst_DEG_UP" = insst_list$UP, "24_IN_Sst_DEG_DOWN" = insst_list$DOWN,
"25_IN_Vip_DEG_UP" = invip_list$UP, "26_IN_Vip_DEG_DOWN" = invip_list$DOWN)
##-------------------------------------------------------
## Perform Super Exact Test
library(SuperExactTest)
library(ggplot2)
library(ggnewscale)
library(rlist)
library(wesanderson)
setout <- list()
setoutAll <- list()
i <- 0
for(mod in names(sleepdeg))
{
i <- i + 1
newGenes <- list.append(myref, mod = sleepdeg[[mod]])
names(newGenes)[[length(newGenes)]] <- mod
setres <- supertest(newGenes, n = 13578, degree = 2) # 13578 is background number of genes
setresData <- as.data.frame(summary(setres)$Table)
setoutAll[[i]] <- setresData
setresDataSel <- setresData[grep(mod, setresData$Intersections),c(1, 3, 5, 6)]
setout[[i]] <- setresDataSel
print(paste(mod, i, sep = " "))
}
names(setoutAll) <- names(sleepdeg)
names(setout) <- names(sleepdeg)
setoutData <- Reduce("rbind", setout)
save(setout, setoutData, setoutAll, file = "SLEEPSEQ_PSEUDOBULK_DGE_CLASS_SET.RData")
## REORGANIZE DATA FOR PLOTS
setoutDataTemp <- as.data.frame(matrix(unlist(strsplit(setoutData$Intersections, " & ")), byrow = T, ncol = 2))
colnames(setoutDataTemp) <- c("REF_GENES", "CELLTYPE_DEG")
setoutDataTemp$Intersections <- setoutData$Intersections
setoutData2 <- merge(setoutData, setoutDataTemp, by = "Intersections")
setoutData2$NegLog10 <- -log10(setoutData2$P.value + 10^-10)
write.table(setoutData2, "SLEEPSEQ_PSEUDOBULK_DGE_CLASS_SET_FOR_PLOT.txt", row.names = FALSE, col.names = TRUE, quote = FALSE, sep = "\t")
setoutData3 <- setoutData2
setoutData3$NewPval <- setoutData3$P.value
setoutData3$NewPval[setoutData3$NewPval > 0.05] <- 1
setoutData3$NegLog10 <- -log10(setoutData3$NewPval + 10^-10)
##-------------------------------------------------------
## HEATMAP
pAll <- ggplot(data = setoutData3) +
geom_tile(data = setoutData3[setoutData3$REF_GENES == "03_MEF2C_CKO_DEG",], aes(x = CELLTYPE_DEG, y = REF_GENES, fill = NegLog10), color = "white", linewidth = 0.5) +
# scale_fill_viridis_c(option = "turbo") +
# scale_fill_gradientn(colours = c("red3", "gold", "green4")) +
scale_fill_gradientn(colours = c("grey95", "green4", "gold", "red3")) +
theme(panel.background = element_blank(), # bg of the panel
panel.grid.major = element_blank(), # get rid of major grid
panel.grid.minor = element_blank(), # get rid of minor grid
axis.title=element_blank(),
panel.grid=element_blank(),
axis.text.x=element_text(size=10, angle=90, vjust=0.5, hjust=1),
axis.ticks=element_blank(),
axis.text.y=element_text(size=10)) +
NULL
pAll <- pAll +
geom_tile(data = setoutData3[setoutData3$REF_GENES == "04_HD4CN_DEG",], aes(x = CELLTYPE_DEG, y = REF_GENES, fill = NegLog10), color = "white", linewidth = 0.5) +
geom_tile(data = setoutData3[setoutData3$REF_GENES == "02_ASD_GENES",], aes(x = CELLTYPE_DEG, y = REF_GENES, fill = NegLog10), color = "white", linewidth = 0.5) +
geom_tile(data = setoutData3[setoutData3$REF_GENES == "01_SSC_GENES",], aes(x = CELLTYPE_DEG, y = REF_GENES, fill = NegLog10), color = "white", linewidth = 0.5) +
NULL
ggsave(filename = "SLEEPSEQ_PSEUDOBULK_DGE_CLASS_SET_FOR_PLOT.pdf", plot = pAll, width = 22, height = 5, units = "in", dpi = 300)