In [None]:
analysis <- "Cropseq_all_integrated"
organ <- "murine"
cellRangerVersion <- 3

library(Seurat)
library(ggplot2)
library(gridExtra)
library(stringi)
library(grid)
library(gridExtra)

root <- getwd()
data <- file.path(root, "data")
raw <- file.path(data, "raw")
RData <- file.path(data, "RData")
processed <- file.path(data, "processed")
pdf <- file.path(processed, "pdf")
csv <- file.path(processed, "csv")
interim <- file.path(data, "interim")
external <- file.path(data, "external")
notebooks <- file.path(root, "notebook")
source(file.path(notebooks, "functions.R"))

sfile <- function(path, name){
    return(file.path(path, paste0(analysis, "_", organ, "__", name)))
}

In [None]:
loadRawSeurat <- function(projectName) {
    rds_filename <- file.path(RData,
                              paste0("Cropseq_",
                                     projectName,
                                     "_",
                                     organ,
                                     "__",
                                     "raw_seuratObject.RDS"))

    if (!file.exists(rds_filename)) {
        stop(message(paste0("RDS file ", rds_filename, " does not exist!")))
        return(NA)
    } else {
        obj <- readRDS(rds_filename)
        message(paste0("Loading Seurat object '", projectName, "' (", nrow(obj@meta.data), " cells)"))
        return(obj)
    }
}

In [None]:
seu.singlets.19_5 <- loadRawSeurat("19_5")
seu.singlets.19_9_1 <- loadRawSeurat("19_9_1")
seu.singlets.19_9_2 <- loadRawSeurat("19_9_2")
seu.singlets.19_23_1 <- loadRawSeurat("19_23_1")
seu.singlets.19_23_2 <- loadRawSeurat("19_23_2")

In [None]:
totalControls <- ncol(seu.singlets.19_5) +
    ncol(seu.singlets.19_9_1) +
    ncol(seu.singlets.19_9_2) +
    ncol(seu.singlets.19_23_1) +
    ncol(seu.singlets.19_23_2)

In [None]:
ob.list <- list(seu.singlets.19_5,
                seu.singlets.19_9_1,
                seu.singlets.19_9_2,
                seu.singlets.19_23_1,
                seu.singlets.19_23_2
               )

In [None]:
for (i in 1:length(ob.list)) {
    ob.list[[i]] <- NormalizeData(ob.list[[i]],
                                  verbose = TRUE)

    ob.list[[i]] <- FindVariableFeatures(ob.list[[i]],
                                         selection.method = "vst",
                                         nfeatures = 2000,
                                         verbose = TRUE)

}

In [None]:
seu.anchors <- FindIntegrationAnchors(object.list = ob.list, dims = 1:30)

In [None]:
seu.singlets.integrated <- IntegrateData(anchorset = seu.anchors, dims = 1:30)

In [None]:
DefaultAssay(object = seu.singlets.integrated) <- "integrated"

In [None]:
seu.singlets.integrated <- ScaleData(object = seu.singlets.integrated, verbose = FALSE)

In [None]:
seu.singlets.integrated <- RunPCA(object = seu.singlets.integrated, npcs = 30, verbose = FALSE)

In [None]:
seu.singlets.integrated <- RunUMAP(object = seu.singlets.integrated,
                                   reduction = "pca", 
                                   dims = 1:30, verbose = FALSE)

In [None]:
cc.genes <- readLines(con = file.path(external, "regev_lab_cell_cycle_genes.txt"))
s.genes <- stri_trans_totitle(cc.genes[1:43])
g2m.genes <- stri_trans_totitle(cc.genes[44:97])

seu.singlets.integrated <- CellCycleScoring(object = seu.singlets.integrated,
                                 s.features = s.genes,
                                 g2m.features = g2m.genes,
                                 set.ident = TRUE)

In [None]:
Freq_phase_data <- data.frame(table(seu.singlets.integrated@meta.data[which(
                                seu.singlets.integrated@meta.data$gRNA_group == "control"), "Phase"]),
           group=rep("control",3))

for(gR_group in names(table(seu.singlets.integrated@meta.data$gRNA_group))){
    if(gR_group == "control") next
    current_table <- table(seu.singlets.integrated@meta.data[which(
                                seu.singlets.integrated@meta.data$gRNA_group == gR_group),"Phase"])

    current_group <- data.frame(current_table,
                               group=rep(gR_group, length(current_table)))

    Freq_phase_data <- rbind(Freq_phase_data, current_group)
}


p1 <- ggplot(Freq_phase_data, aes(group, Freq, fill=Var1)) +
geom_bar(position = position_fill(reverse = TRUE), stat="Identity", color="black") +
labs(x="", y="", fill="Cluster") +
theme(axis.text.x = element_text(angle=90, hjust = 1), axis.text.y = element_text(size=4))

p2 <- ggplot(Freq_phase_data, aes(group, Freq, fill=Var1)) +
geom_bar(position = position_stack(reverse = TRUE), stat="Identity", color="black") +
labs(x="", y="", fill="Phase") +
theme(axis.text.x = element_text(angle=90, hjust = 1), axis.text.y = element_text(size=4))

p2 + p1

pdf(sfile(pdf, "Ratio_gRNA_groups_Phase.pdf"))
grid.arrange(p2, p1, ncol=1)
dev.off()

In [None]:
seu.singlets.integrated <- seu.singlets.integrated[,seu.singlets.integrated@meta.data$Phase == "G1"]

In [None]:
# Cluster analysis and UMAP
seu.singlets.integrated <- FindNeighbors(seu.singlets.integrated, dims = 1:10)
seu.singlets.integrated <- FindClusters(seu.singlets.integrated, resolution = 0.5)

In [None]:
plot1 <- DimPlot(seu.singlets.integrated, reduction = "umap")

pdf(sfile(pdf, "ClusterAnalysis_integrated_batchCorrect_UMAP_plot.pdf"))
plot1
dev.off()

plot1

In [None]:
Th17_genes <- c("Rorc", "Il17a", "Il17f", "Il23r", "Il6ra", "Il22", "Ccr6")

Th1_genes <- c("Tbx21", "Ifng", "Ccr5", "Ccl5", "Cxcr3", "Il12rb1")

Treg_genes <- c("Lag3", "Havcr2", "Pdcd1", "Ctla4", "Itga1",
                "Foxp3", "Maf", "Irf1", "Batf",
                "Il12rb2", "Tnfrsf18", "Nfil3", "Ahr", "Rora","Il2ra", "Icos")

In [None]:
DefaultAssay(seu.singlets.integrated)<-"RNA"

In [None]:
seu.singlets.integrated <- AddModuleScore(seu.singlets.integrated,
                                          ctrl = 80,
                               name = 'Th17_Sig',
                               features = list(Th17_genes))

seu.singlets.integrated <- AddModuleScore(seu.singlets.integrated,
                                          ctrl = 80,
                               name = 'Th1_Sig',
                               features = list(Th1_genes))

seu.singlets.integrated <- AddModuleScore(seu.singlets.integrated,
                                          ctrl = 80,
                               name = 'Treg_Sig',
                               features = list(Treg_genes))

In [None]:
plot1 <- VlnPlot(seu.singlets.integrated, features = c("Th17_Sig1"), sort = FALSE, pt.size = 0) + 
            stat_summary(fun = median, geom='point', size = 25, colour = "black", shape = 95) +
            NoLegend()

plot2 <- VlnPlot(seu.singlets.integrated, features = c("Th1_Sig1"), sort = FALSE, pt.size = 0) + 
            stat_summary(fun = median, geom='point', size = 25, colour = "black", shape = 95) +
            NoLegend()
plot3 <- VlnPlot(seu.singlets.integrated, features = c("Treg_Sig1"), sort = FALSE, pt.size = 0) + 
            stat_summary(fun = median, geom='point', size = 25, colour = "black", shape = 95) +
            NoLegend()

plot1
plot2
plot3

pdf(sfile(pdf, "Signature_scores_integrated_batchCorrect.pdf"), width=6, height=18)
plot1 + plot2 + plot3
dev.off()

In [None]:
# Exclude outlier gRNA (Cropseq_19_9_1 in gRNA_IL-23R_3_gene)
cells.to.use <- rownames(seu.singlets.integrated@meta.data[seu.singlets.integrated@meta.data$gRNA ==
                                                           "gRNA_IL-23R_3_gene" &
                                  seu.singlets.integrated@meta.data$orig.ident == "Cropseq_19_9_1",])

In [None]:
seu.singlets.integrated <- subset(seu.singlets.integrated, cells = cells.to.use, invert = TRUE)

In [None]:
Th17_barcodes <- rownames(seu.singlets.integrated@meta.data[seu.singlets.integrated@meta.data$seurat_clusters %in% c(1, 5) &
                                                 seu.singlets.integrated@meta.data$gRNA_group == "control",])

message(paste0("Found ", length(Th17_barcodes), " cells in Th17 control subset"))

Th1_barcodes <- rownames(seu.singlets.integrated@meta.data[seu.singlets.integrated@meta.data$seurat_clusters %in% c(3) &
                                                seu.singlets.integrated@meta.data$gRNA_group == "control",])

message(paste0("Found ", length(Th1_barcodes), " cells in Th1 control subset"))


Treg_barcodes <- rownames(seu.singlets.integrated@meta.data[seu.singlets.integrated@meta.data$seurat_clusters %in% c(0, 6) &
                                                 seu.singlets.integrated@meta.data$gRNA_group == "control",])

message(paste0("Found ", length(Treg_barcodes), " cells in Treg control subset"))

In [None]:
groups <- unique(seu.singlets@meta.data$gRNA_group)

In [None]:
get_UMAP_Median_by_cells <- function(seuObj, cells, group) {
    seuSubset <- subset(seuObj, cells = cells)
    umapSubset <- data.frame(Embeddings(seuSubset, reduction = "umap"))
    return(data.frame(group = group,
                      umap1 = median(umapSubset$UMAP_1),
                      umap2 = median(umapSubset$UMAP_2),
                      counts = length(rownames(seuSubset@meta.data))))
}

In [None]:
getControlGroupLines <- function(umapMedians) {
    centerControl <- umapMedians[umapMedians$group == "control",]
    controlCoords <- data.frame()
    transDiffSubet <- subset(umapMedians, grepl("_control$", group))
    for (group in transDiffSubet$group) {
        targetControl <- umapMedians[umapMedians$group == group,]
        controlCoords <- rbind(controlCoords, data.frame(x1=centerControl$umap1,
                                                         y1=centerControl$umap2,
                                                         x2=targetControl$umap1,
                                                         y2=targetControl$umap2))
    }
    return(controlCoords)
}

In [None]:
getClusterBarcodes <- function(seuObj, cluster) {
    barcodes <- rownames(seuObj@meta.data[seuObj@meta.data$seurat_clusters %in% c(cluster),])
    message(paste0("Found ", length(barcodes), " cells in cluster ", cluster))
    return(barcodes)
}

In [None]:
UMAP_Medians <- data.frame()

for (g in 1:length(groups)) {
    if (groups[g] != "control") {
        UMAP_Medians <- rbind(UMAP_Medians, get_UMAP_Median_by_cells(seuObj = seu.singlets.integrated,
                           cells = rownames(seu.singlets.integrated@meta.data[
                              seu.singlets.integrated@meta.data$gRNA_group == groups[g],]),
                           group = groups[g]))
    }
}

# Add UMAP Median for control subsets

UMAP_Medians <- rbind(UMAP_Medians, get_UMAP_Median_by_cells(seuObj = seu.singlets.integrated,
                             cells = Th17_barcodes,
                             group = "Th17_control"))


UMAP_Medians <- rbind(UMAP_Medians, get_UMAP_Median_by_cells(seuObj = seu.singlets.integrated,
                             cells = Th1_barcodes,
                             group = "Th1_control"))


UMAP_Medians <- rbind(UMAP_Medians, get_UMAP_Median_by_cells(seuObj = seu.singlets.integrated,
                             cells = Treg_barcodes,
                             group = "Treg_control"))


# All control subset
UMAP_Medians <- rbind(UMAP_Medians, get_UMAP_Median_by_cells(seuObj = seu.singlets.integrated,
                             cells = rownames(seu.singlets.integrated@meta.data[seu.singlets.integrated@meta.data$gRNA_group == "control",]),
                             group = "control"))



library(ggrepel)

ggtheme <- theme(plot.title = element_text(face = 'bold',size = 18, hjust = 0.5),
    axis.title = element_text(size=8),
    axis.text = element_text(size=8),
    legend.position="none")


p1 <- ggplot(UMAP_Medians, aes(umap1, umap2)) +
    geom_segment(data=getControlGroupLines(UMAP_Medians),
                 aes(x = x1, y = y1, xend = x2, yend = y2),
                 color="#cccccc",
                 lineend="round", linejoin="round",
                 arrow=arrow(length = unit(0.1, "inches")),
                 size=1) +
    geom_point(aes(size=counts), color="black") +
    geom_point(data = subset(UMAP_Medians, grepl("_control$", group)),
               color="blue",
               aes(umap1, umap2, size=counts)) +
    geom_point(data = subset(UMAP_Medians, grepl("^control$", group)),
               color="red",
               aes(umap1, umap2, size=counts)) +
    labs(x="UMAP_1", y="UMAP_2", title="UMAP Medians per gRNA group") +
    geom_label_repel(aes(label=group), size=2.5, alpha=.7, max.overlaps = 15) +
    theme_classic() + theme(aspect.ratio=1) + coord_fixed()

p1

pdf(sfile(pdf, "UMAP_median_bygRNA_group.pdf"), useDingbats = FALSE, width=6, height=6)
p1
dev.off()

In [None]:
# save current results
saveRDS(seu.singlets.integrated, sfile(RData, "gRNA_integrated_seuratObject.RDS"))

In [None]:
# Seurat object is then converted to h5ad object using SeuratDisk
# See https://mojaveazure.github.io/seurat-disk/articles/convert-anndata.html