# This script is designed to pipeline Seurat object output into Monocle3 # This script also contains all the necessary functionalities in Monocle3 as # This could also convert a 2D Seurat object and visualize/analyze it in 3D # It will start from reading in a Seurat object with the count sparse matrix, UMAP coordinates, and cluster information # This script is originally written for local machines but adaptations have also been included in annotations ### Require:: 'BiocManager', 'BiocGenerics', 'DelayedArray', 'DelayedMatrixStats', 'limma', 'S4Vectors', 'SingleCellExperiment', 'SummarizedExperiment', 'reticulate', 'htmlwidgets' # This is a required python package #reticulate::py_install("louvain") # This is installing the actual monocle3 #devtools::install_github('cole-trapnell-lab/monocle3') ### Installing the packages library(Seurat) library(monocle3) library(htmlwidgets) ### Specifying input parameters # args = commandArgs(trailingOnly = T) # input.dir <- args[1] # output.dir.input <- args[2] # upn <- args[3] # Dim <- args[4] # Celltypefile <- args[5] # nPC <- args[6] # cluster.res <- args[7] # output.dir <- sprintf("%s/%s.%s.Seurat.to.Monocle3.%s", output.dir.input, upn, date, Dim) date = gsub("2019-","19",Sys.Date(),perl=TRUE); date = gsub("-","",date) Dim = "2D" upn = "TWEJ-ND050819" input.dir <- getwd() nPC <- 15 cluster.res <- 0.7 output.dir <- sprintf("%s/%s.%s.Seurat.to.Monocle3.%s", getwd(), upn, date, Dim) dir.create(file.path(output.dir), showWarnings = FALSE) ### Reading in Seurat object print("Readingin Seurat objects") seurat <- readRDS(file = input.dir) ### Re-dimension reduction for 3D rendering if (Dim = "3D") { print ("Running UMAP 3D") seurat <- RunUMAP(object = seurat, reduction = "pca", dims = 1:nPC, n.components = 3) print("Clustering 3D") seurat <- FindNeighbors(object=seurat, dims=1:nPC) seurat <- FindClusters(object=seurat, resolution=cluster.res) seurat[[sprintf("ClusterNames_%.1f_%dPC", cluster.res, nPC)]] <- Idents(object = seurat) } ### Building the necessary parts for a basic cds # part one, gene annotations gene_annotation <- as.data.frame(rownames(seurat@reductions[["pca"]]@feature.loadings), row.names = rownames(seurat@reductions[["pca"]]@feature.loadings)) colnames(gene_annotation) <- "gene_short_name" # part two, cell information cell_metadata <- as.data.frame(seurat@assays[["RNA"]]@counts@Dimnames[[2]], row.names = seurat@assays[["RNA"]]@counts@Dimnames[[2]]) colnames(cell_metadata) <- "barcode" # part three, counts sparse matrix New_matrix <- seurat@assays[["RNA"]]@counts New_matrix <- New_matrix[rownames(seurat@reductions[["pca"]]@feature.loadings), ] expression_matrix <- New_matrix ### Construct the basic cds object cds_from_seurat <- new_cell_data_set(expression_matrix, cell_metadata = cell_metadata, gene_metadata = gene_annotation) ### Construct and assign the made up partition recreate.partition <- c(rep(1, length(cds_from_seurat@colData@rownames))) names(recreate.partition) <- cds_from_seurat@colData@rownames recreate.partition <- as.factor(recreate.partition) cds_from_seurat@clusters@listData[["UMAP"]][["partitions"]] <- recreate.partition ### Assign the cluster info list_cluster <- seurat@meta.data[[sprintf("ClusterNames_%s_%sPC", cluster.res, nPC)]] names(list_cluster) <- seurat@assays[["RNA"]]@data@Dimnames[[2]] cds_from_seurat@clusters@listData[["UMAP"]][["clusters"]] <- list_cluster ### Could be a space-holder, but essentially fills out louvain parameters cds_from_seurat@clusters@listData[["UMAP"]][["louvain_res"]] <- "NA" ### Assign UMAP coordinate cds_from_seurat@reducedDims@listData[["UMAP"]] <-seurat@reductions[["umap"]]@cell.embeddings ### Assign feature loading for downstream module analysis cds_from_seurat@preprocess_aux$gene_loadings <- seurat@reductions[["pca"]]@feature.loadings ### Learn graph, this step usually takes a significant period of time for larger samples print("Learning graph, which can take a while depends on the sample") cds_from_seurat <- learn_graph(cds_from_seurat, use_partition = T) ### Reading in cell type information print("Reading in cell type information") Celltypefile <- "celltype.predictions.July2019.xls" lineage.table <- read.table(file = Celltypefile, sep = "", header = T) lineage.table$cell.barcodes.i. <- paste(lineage.table$cell.barcodes.i., "-1", sep = "") lineage.table <- lineage.table[, 1:2] lineage.table$lineage1 <- gsub("_\\d+","", lineage.table$lineage1) lineage.table$lineage1 <- gsub("TCELLA","T-CELL", lineage.table$lineage1) lineage.table$lineage1 <- gsub("BCELLA","B-CELL", lineage.table$lineage1) lineage.table$lineage1 <- gsub("NKA","NK", lineage.table$lineage1) lineage.table$lineage1 <- gsub("DENDRA","DENDRITIC", lineage.table$lineage1) lineage.table$lineage1 <- gsub("DENDA","DENDRITIC", lineage.table$lineage1) lineage.table$lineage1 <- gsub("[0-9]+", "", lineage.table$lineage1) lineage.table$cell.barcodes.i. <- gsub("-1", "", lineage.table$cell.barcodes.i.) indices <- match(pData(cds_from_seurat)$barcode, lineage.table$cell.barcodes.i.) lineage.table.refined <- lineage.table[indices,] lineage.table.final <- lineage.table.refined[, 2] colData(cds_from_seurat)$celltype <- lineage.table.final ### Plot cluster info with trajectory print("Plotting clusters") if (Dim == "2D") { pdf(sprintf("%s/clusters.with.trajectory.%s.pdf", output.dir, Dim), width = 10, height = 10) clus <- plot_cells(cds_from_seurat, color_cells_by = 'cluster', label_groups_by_cluster=FALSE, label_leaves=FALSE, label_branch_points=FALSE) print(clus) dev.off() } if (Dim == "3D") { cluster <- plot_cells_3d(cds_from_seurat, color_cells_by = 'cluster', label_groups_by_cluster=FALSE, label_leaves=FALSE, label_branch_points=FALSE) saveWidget(cluster, file = sprintf("%s/clusters.with.trajectory.%s.html", output.dir, Dim)) } ### Plot cell type info with trajectory print("Plotting cell type info") if (Dim == "2D") { pdf(sprintf("%s/celltype.with.trajectory.%s.pdf", output.dir, Dim), width = 10, height = 10) ctype <- plot_cells(cds_from_seurat, color_cells_by = 'celltype', label_groups_by_cluster=FALSE, label_leaves=FALSE, label_branch_points=FALSE) print(ctype) dev.off() } if (Dim == "3D") { celltype <- plot_cells_3d(cds_from_seurat, color_cells_by = 'celltype', label_groups_by_cluster=FALSE, label_leaves=FALSE, label_branch_points=FALSE) saveWidget(celltype, file = sprintf("%s/celltype.with.trajectory.%s.html", output.dir, Dim)) } ### You can also plot out different subclone information, which is not shown here # Before we order the cells in pseudotime, we need to identify which is the pricipal node/cells # We can either use their default GUI (which unfortunately is not compatible with 3D), or in the case of NBM, select the HSCs # For the sake of completeness, we will feed in the cells they are labeled as HSCs, but in other samples for example HSCs, it could be a different story # plus I am not sure if the GUI will show up on the cluster # This is using the GUI # cds_from_seurat <- order_cells(cds_from_seurat) root_cell_list <- grep("HSC", colData(cds_from_seurat)$celltype) root_cell_list <- counts(cds_from_seurat)@Dimnames[[2]][root_cell_list] # Sometimes, in this case, the root cells are dispered so single cell has to be identified to input into the order cells function cds_from_seurat <- order_cells(cds_from_seurat, root_cells = "CTTGATTCACAAAGTA") ### Now we plot pseudotime print("Plotting pseudotime") if (Dim == "2D") { pdf(sprintf("%s/pseudotime.with.trajectory.%s.pdf", output.dir, Dim), width = 10, height = 10) ptime <- plot_cells(cds_from_seurat, color_cells_by = 'pseudotime', label_groups_by_cluster=FALSE, label_leaves=FALSE, label_branch_points=FALSE) print(ptime) dev.off() } if (Dim == "3D") { pseudotime <- plot_cells_3d(cds_from_seurat, color_cells_by = 'pseudotime', label_groups_by_cluster=FALSE, label_leaves=FALSE, label_branch_points=FALSE) saveWidget(pseudotime, file = sprintf("%s/pseudotime.with.trajectory.%s.html", output.dir, Dim)) } ### Now we can subset the cells and do branch expression/pseudotime analysis ### This first part is to identify genes that are differentially expressed on the different paths through the trajectory # This function is disabled for now p <- 0 if (p > 0) { print("calculating most significant genes across the whole trajectory, EXRREMELY time-consuming") cds_from_seurat_pr_test_res = graph_test(cds_from_seurat, neighbor_graph="principal_graph", cores=4) pr_deg_ids = row.names(subset(cds_from_seurat_pr_test_res, q_value < 0.05)) gene_list_1 <- pr_deg_ids[1:4] ### Plotting individual genes on the map print("Plotting individual genes across the whole trajectory") if (Dim == "2D") { pdf(sprintf("%s/top4.highly.sig.genes.with.trajectory.%s.pdf", output.dir, Dim), width = 20, height = 20) hgenes <- plot_cells(cds_from_seurat, genes = gene_list_1, show_trajectory_graph=FALSE, label_cell_groups=FALSE, label_leaves=FALSE) print(hgenes) dev.off() } if (Dim == "3D") { indi_genes <- plot_cells_3d(cds_from_seurat, genes = gene_list_1, label_groups_by_cluster=FALSE, label_leaves=FALSE, label_branch_points=FALSE) saveWidget(indi_genes, file = sprintf("%s/top4.highly.sig.genes.with.trajectory.%s.html", output.dir, Dim)) } ### Now we can collect the trajectory-variable genes into modules for co-reg elements gene_module_df = monocle3:::find_gene_modules(cds_from_seurat[pr_deg_ids,], resolution=c(0,10^seq(-6,-1))) cell_group_df = tibble::tibble(cell=row.names(colData(cds_from_seurat)), cell_group=colData(cds_from_seurat)$cell.type) agg_mat = aggregate_gene_expression(cds_from_seurat, gene_module_df, cell_group_df) row.names(agg_mat) = stringr::str_c("Module ", row.names(agg_mat)) pheatmap::pheatmap(agg_mat, scale="column", clustering_method="ward.D2") ### Now we can plot these modules if (Dim == "2D") { pdf(sprintf("%s/sig.modules.with.trajectory.%s.pdf", output.dir, Dim), width = 20, height = 20) smodules <- plot_cells(cds_from_seurat, genes=gene_module_df %>% filter(module %in% c(29, 20, 11, 22)), label_cell_groups=FALSE, show_trajectory_graph=FALSE) print(smodules) dev.off() } if (Dim == "3D") { indi_modules <- plot_cells_3d(cds_from_seurat, genes=gene_module_df %>% filter(module %in% c(29, 20, 11, 22)), label_groups_by_cluster=FALSE, label_leaves=FALSE, label_branch_points=FALSE) saveWidget(indi_modules, file = sprintf("%s/sig.modules.with.trajectory.%s.html", output.dir, Dim)) } ### Or we can plot them against pseudotime # Say you want to study the expression of certain gene (MPO) in certain lineages (HSC - MEP - CEP - Monocyte) Mono_genes = c("MPO") Mono_lineage_cds_from_seurat = cds_from_seurat[rowData(cds_from_seurat)$gene_short_name %in% Mono_genes, colData(cds_from_seurat)$celltype %in% c("MONO", "MEP", "CEP", "HSC")] print("Plotting genes against pseudotime") pdf(sprintf("%s/Genes.against.pseudotime.%s.pdf", output.dir, Dim), width = 20, height = 20) plot_genes_in_pseudotime(Mono_lineage_cds_from_seurat, color_cells_by="celltype", min_expr=0.5) dev.off() } ### Now we can subset the cells and do branch expression/pseudotime analysis ### This second part is to identify genes that are differentially expressed only within this selected region but not necessarily with the trajectories # This function is disabled for now as well if (p > 1) { # Subsetting the cells # Yet this step required GUI, which I am not sure how it will pan out on the cluster cds_from_seurat_subset = choose_cells(cds_from_seurat) # Identify the significant genes print("Subset analysis") subset_pr_test_res = graph_test(cds_from_seurat_subset, cores=4) pr_deg_ids = row.names(subset(subset_pr_test_res, q_value < 0.05)) # Group them into modules # Here the resolution parameters and k might vary so feel free to adjust them as well # Sometimes when there are too few genes we might want to gene_module_df = monocle3:::find_gene_modules(cds_from_seurat_subset[pr_deg_ids,], resolution=1e-5, verbose = T, k = 10) # Plot the scores for the modules cell_group_df = tibble::tibble(cell=row.names(colData(cds_from_seurat_subset)), cell_group=clusters(cds_from_seurat_subset)[colnames(cds_from_seurat_subset)]) agg_mat = aggregate_gene_expression(cds_from_seurat_subset, gene_module_df, cell_group_df) module_dendro = hclust(dist(agg_mat)) gene_module_df$module <- factor(gene_module_df$module, levels = row.names(agg_mat)[module_dendro$order]) # Plot the modules along the trajectory if (Dim == "2D") { pdf(sprintf("%s/subset.sig.modules.with.trajectory.%s.pdf", output.dir, Dim), width = 20, height = 20) submodules <- plot_cells(cds_from_seurat, genes=gene_module_df, label_cell_groups=FALSE, show_trajectory_graph=FALSE) print(submodules) dev.off() } if (Dim == "3D") { subset_indi_modules <- plot_cells_3d(cds_from_seurat, genes=gene_module_df, label_groups_by_cluster=FALSE, label_leaves=FALSE, label_branch_points=FALSE) saveWidget(subset_indi_modules, file = sprintf("%s/subset.sig.modules.with.trajectory.%s.html", output.dir, Dim)) } } ### End of the script