In [None]:
analysis <- "Cropseq_19_9_2"
projectPath <- "19_9_2"
organ <- "murine"
cellRangerVersion <- 3

library(Seurat)
library(ggplot2)
library(gridExtra)
library(stringi)
library(grid)
library(gridExtra)

root <- getwd()
data <- file.path(root, "data")
raw <- file.path(data, "raw")
RData <- file.path(data, "RData")
processed <- file.path(data, "processed")
pdf <- file.path(processed, "pdf")
csv <- file.path(processed, "csv")
interim <- file.path(data, "interim")
external <- file.path(data, "external")
notebooks <- file.path(root, "notebook")
source(file.path(notebooks, "functions.R"))

sfile <- function(path, name){
    return(file.path(path, paste0(analysis, "_", organ, "__", name)))
}

In [None]:
convertTo_cellgRNA_table <- function(matrix){
    rows <- rownames(matrix)
    cols <- colnames(matrix)
    cell_gRNA_tab <- data.frame(cell_barcode =  cols, stringsAsFactors = FALSE)
    cell_gRNA_tab$gRNA <- sapply(cell_gRNA_tab$cell_barcode, function(x){
        if(!(TRUE %in% (matrix[,x] >= 1))) NA
        else paste0(rows[matrix[,x] >= 1], collapse ="|")
    })
    cell_gRNA_tab$ngRNA <- colSums(matrix >= 1)
    return(cell_gRNA_tab)
}

In [None]:
normalize_expression <- function(matrix, scaling=10000){
    norm_matrix <- matrix(0, nrow=nrow(matrix), ncol=ncol(matrix))
    rownames(norm_matrix) <- rownames(matrix)
    colnames(norm_matrix) <- colnames(matrix)
    lib_size <- colSums(matrix)
    norm_matrix <- log2(1 + t(t(matrix)/lib_size) * scaling)
    return(norm_matrix)
}

In [None]:
# Load 10X data from raw data
files_10x <- load.10x(file.path(raw, projectPath), cellranger.version = cellRangerVersion)
rownames(files_10x$matrix.mtx) <-  make.unique(as.vector(files_10x$features.tsv$V2))
colnames(files_10x$matrix.mtx) <- files_10x$barcodes.tsv$V1

In [None]:
# Load annotation data which used for alignment
annotation <- read.csv("TH17_Signaturelibrary.csv", sep="\t")

In [None]:
# Matrix of gRNA names
matrix_gRNA <- as.matrix(files_10x$matrix.mtx[paste0("gRNA_", annotation$oligo_name, "_gene"),])

# Matrix of gene expression with no gRNA
matrix_regular <- as.matrix(files_10x$matrix.mtx[!(rownames(files_10x$matrix.mtx) %in%
                                                   paste0("gRNA_", annotation$oligo_name, "_gene")),])

# Matrix of all gene expression
matrix_all <- as.matrix(files_10x$matrix.mtx)
matrix_gRNA_singlets <- matrix_gRNA[,colSums(matrix_gRNA >= 1) == 1]

# Save data
saveRDS(matrix_gRNA, sfile(RData, "matrix_gRNA.RDS"))
saveRDS(matrix_regular, sfile(RData, "matrix_regular.RDS"))
saveRDS(matrix_gRNA_singlets, sfile(RData, "matrix_gRNA_singlets.RDS"))

In [None]:
matrix_regular <- readRDS(sfile(RData, "matrix_regular.RDS"))
totalGeneCounts <- data.frame(rowSums(matrix_regular))
totalGeneCounts$gene <- rownames(totalGeneCounts)
colnames(totalGeneCounts) <- c("count", "gene")
totalGeneCounts <- totalGeneCounts[totalGeneCounts$count > 0,]
write.csv(totalGeneCounts, sfile(csv, "total_gene_count.csv"))

In [None]:
cell_gRNA_tab <- convertTo_cellgRNA_table(matrix_gRNA)
cell_gRNA_tab$nGene <- colSums(matrix_regular[,cell_gRNA_tab$cell_barcode] >= 1)
cell_gRNA_tab$nUMI <- colSums(matrix_regular[,cell_gRNA_tab$cell_barcode])

cell_gRNA_singlets_tab <- convertTo_cellgRNA_table(matrix_gRNA_singlets)
cell_gRNA_singlets_tab$gRNA_group <- sapply(cell_gRNA_singlets_tab$gRNA, function(x){
    annotation[unlist(strsplit(unlist(strsplit(x, '_gene', fixed = TRUE)), "gRNA_", fixed=TRUE))[2] == annotation$oligo_name, "group"]
})

write.csv(cell_gRNA_tab, file=sfile(csv, "cell_gRNA_tab.csv"))
write.csv(cell_gRNA_singlets_tab, file=sfile(csv, "cell_gRNA_singlets_tab.csv"))

In [None]:
matrix_regular_gRNASinglets <- matrix_regular[,cell_gRNA_singlets_tab$cell_barcode]
saveRDS(matrix_regular_gRNASinglets, file=sfile(RData, "matrix_regular_gRNASinglets.RDS"))

In [None]:
percent_unique_gRNA <- vector()
percent_multiple_gRNA <- vector()
ncells <- vector()

thresholds <- c(125, 250, 500, 1000, 2000, 3000, 4000, 5000)

for(i in thresholds){
    cells <- cell_gRNA_tab$cell_barcode[cell_gRNA_tab$nGene >= i]
    ncells <- c(ncells, length(cells))
    percent_unique_gRNA <- c(percent_unique_gRNA,
        sum(cell_gRNA_tab$cell_barcode[cell_gRNA_tab$ngRNA == 1] %in% cells) / length(cells))
    percent_multiple_gRNA <- c(percent_multiple_gRNA,
        sum(cell_gRNA_tab$cell_barcode[cell_gRNA_tab$ngRNA > 1] %in% cells) / length(cells))
}
percent_not_assigned_gRNA <- 1 - (percent_unique_gRNA + percent_multiple_gRNA)

library(reshape2)
quality_genes_gRNA_melt <- melt(data.frame(threshold = factor(thresholds, levels=thresholds),
           singlets = percent_unique_gRNA,
           impurities = percent_multiple_gRNA,
           not_assigned  = percent_not_assigned_gRNA), vars.id="threshold")
quality_genes_gRNA_melt$variable <- factor(quality_genes_gRNA_melt$variable,
                                           levels=c("not_assigned", "singlets", "impurities"))

# Plotting
pdf(sfile(pdf, "nGenes_gRNA_Assigned.pdf"))
ggplot(quality_genes_gRNA_melt, aes(x=threshold, y=value*100, fill=variable)) +
geom_bar(stat="identity") +
scale_fill_manual(values = c("darkblue", "peru", "darkgreen")) +
labs(y="Percent of all cells", x="Genes covered per cell", fill="metric") +
scale_y_continuous(limits=c(0, 100)) +
theme(legend.position = c(.7,.85))
dev.off()

In [None]:
rownames(cell_gRNA_tab) <- cell_gRNA_tab$cell_barcode
cell_gRNA_tab$gRNA_group <- NA
cell_gRNA_tab[cell_gRNA_singlets_tab$cell_barcode, "gRNA_group"] <- as.vector(cell_gRNA_singlets_tab$gRNA_group)
cell_gRNA_tab$gRNA_version <- NA
cell_gRNA_tab[cell_gRNA_singlets_tab$cell_barcode, "gRNA_version"] <-
    sapply(cell_gRNA_tab[cell_gRNA_singlets_tab$cell_barcode, "gRNA"],
            function(x) return(unlist(strsplit(x, "_"))[2])
    )

In [None]:
plot <- ggplot(cell_gRNA_tab, aes(x=ngRNA)) +
    scale_x_continuous(breaks=0:10) +
    geom_histogram(alpha=.7, binwidth = 1, fill="steelblue") + 
    labs(x="Number of gRNA in cell", y="Number of Cells", title="gRNA assigned per cell") + 
    theme_classic() + 
    theme(plot.title = element_text(hjust = 0.5,size=14, color = "black"))

plot

pdf(sfile(pdf, "gRNA_assigned_perCell.pdf"), useDingbats = FALSE, width=10, height= 10)
plot
dev.off()

In [None]:
# Export assignmet distribution
gRNA_denstity <- data.frame(gRNAs= c(0:20),
                            counts = c(sapply(0:20, function(x) nrow(cell_gRNA_tab[cell_gRNA_tab$ngRNA == x,]))))
write.csv(gRNA_denstity, file=sfile(csv, "assiged_gRNAs.csv"))

In [None]:
assignDistribution <- data.frame(groups = c("no gRNA", "single gRNA", "multiple gRNA"),
                                counts = c(nrow(cell_gRNA_tab[cell_gRNA_tab$ngRNA == 0,]),
    nrow(cell_gRNA_tab[cell_gRNA_tab$ngRNA == 1,]),
    nrow(cell_gRNA_tab[cell_gRNA_tab$ngRNA > 1,])))

In [None]:
# Plot assignment piechart
percent <- function(x) {
    return(paste0(round(x * 100, 1), "%"))
}

plot <- ggplot(assignDistribution, aes(x="", y=counts, fill=groups))+
    geom_bar(width = 1, stat = "identity") + coord_polar("y", start=0) + theme_minimal()+
    labs(x = NULL, y = NULL, fill = NULL, title = "19-9-2") +
    theme(axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          panel.border = element_blank(),
          plot.title = element_text(hjust = 0.5,size=20, color = "black"),
          panel.grid=element_blank(),
          axis.ticks = element_blank()) +
    theme(axis.text.x=element_blank()) +
    geom_text(aes(label = percent(counts/sum(counts))),
              position = position_stack(vjust = 0.5),
              size=5) +
    scale_fill_manual(values=c("#9ecae2", "#deebf7", "#3281bd"))

plot

pdf(sfile(pdf, "gRNA_assigned_distribution_PieChart.pdf"), useDingbats = FALSE, width=7, height= 7)
plot
dev.off()

In [None]:
seu.singlets <- CreateSeuratObject(counts = matrix_regular_gRNASinglets, project = analysis)
seu.singlets@meta.data[cell_gRNA_singlets_tab$cell_barcode, "gRNA_group"] <- cell_gRNA_singlets_tab$gRNA_group

In [None]:
#Add gRNA_Information to seurat object
seu.singlets@meta.data$gRNA <- NA
seu.singlets@meta.data[cell_gRNA_singlets_tab$cell_barcode, "gRNA"] <-
    cell_gRNA_singlets_tab$gRNA
seu.singlets@meta.data$gRNA_group <- NA
seu.singlets@meta.data[cell_gRNA_singlets_tab$cell_barcode, "gRNA_group"] <-
    as.vector(cell_gRNA_singlets_tab$gRNA_group)

In [None]:
# Calculating percentage of mitochondrial genes
mt.genes.idx <- grep('mt-', rownames(seu.singlets))
mt.genes.per <- Matrix::colSums(seu.singlets[mt.genes.idx,])/Matrix::colSums(seu.singlets)
seu.singlets <- AddMetaData(seu.singlets, metadata=mt.genes.per, col.name='mito.percent')

In [None]:
# Isolate cells
seu.singlets <- subset(seu.singlets, subset = nFeature_RNA > 500 & nFeature_RNA < 4000 & mito.percent < 0.1)

# Remove MITO genes
seu.singlets <- seu.singlets[-mt.genes.idx, ]

In [None]:
freq_gRNA <- data.frame(table(seu.singlets@meta.data$gRNA))
keep_cells <- rownames(seu.singlets@meta.data)[seu.singlets@meta.data$gRNA %in%
                                               as.vector(freq_gRNA[freq_gRNA$Freq >= 10, "Var1"])]

In [None]:
seu.singlets <- subset(seu.singlets, cells = keep_cells)

In [None]:
# Normize data
seu.singlets <- NormalizeData(seu.singlets, normalization.method = "LogNormalize", scale.factor = 10000)

In [None]:
# Scale data
seu.singlets <- ScaleData(object = seu.singlets)

In [None]:
saveRDS(seu.singlets, sfile(RData, "raw_seuratObject_scaled.RDS"))