# Main analysis file using Seurat and Monocle

### Installing libraries and packages for analysis

In [None]:
library(Seurat)
library(SeuratData)
library(SeuratWrappers)
library(sctransform)
library(monocle3)
library(SingleCellExperiment)
library(zellkonverter) # for readH5AD()
library(ggplot2)
library(ComplexHeatmap)
library(zoo)
library(dplyr)
library(magrittr)
library(viridis)
library(patchwork)
library(ggpubr)
library(png)
library(loomR)

### Code

In [None]:
# Import data and convert to Seurat object
alpha_data <- readH5AD(file = "~/Documents/R/GSE161621_filtered_data/alphamns.h5ad")
alpha <- as.Seurat(alpha_data, counts = "X", data = NULL)

# Preprocessing and PCA 
# Alternative: use Embeddings(alpha, reduction = "X_pca")
alpha <- NormalizeData(alpha, normalization.method = "LogNormalize", scale.factor = 10000, verbose = FALSE)
alpha <- FindVariableFeatures(alpha, selection.method = "vst", nfeatures = 2000, verbose = FALSE) 
alpha <- ScaleData(alpha, features = rownames(alpha), verbose = FALSE)
alpha <- RunPCA(alpha, features = VariableFeatures(object = alpha), seed.use = 42, verbose = FALSE) 
alpha <- RunUMAP(alpha, dims = 1:10, seed.use = 42, verbose = FALSE)

# Identifying FF, FR, SF cells
Idents(object = alpha) <- "Unknown"
Idents(object = alpha, WhichCells(object = alpha, expression = Prkcd>0 & Kcnq5==0, slot = 'data')) <- "sf"
Idents(object = alpha, WhichCells(object = alpha, expression = Kcnq5>0 & Prkcd==0, slot = 'data')) <- "ff"
Idents(alpha) <- factor(Idents(alpha), levels = c('sf','ff', 'Unknown'))
alpha$celltype <- Idents(alpha)

unknown <- subset(alpha_new, idents = "Unknown")
unknown_names <- colnames(unknown@assays$RNA@data)

# PCA embeddings and min-max scaling
pc1_coord <- Embeddings(alpha, reduction = "pca")[,1]
pc1_coord_horizshift <- pc1_coord - unname(sort(pc1_coord)[1])
pc1_coord_scaled <- (pc1_coord - min(pc1_coord)) / (max(pc1_coord) - min(pc1_coord))
alpha$pseudotime <- pc1_coord_scaled
pseudotime_ordered <- pc1_coord_scaled[order(pc1_coord_scaled)]

# Monocle
alphaM <- as.cell_data_set(alpha)
alphaM <- estimate_size_factors(alphaM)
alphaM@rowRanges@elementMetadata@listData[["gene_short_name"]] <- rownames(alpha[['RNA']])
alphaM@principal_graph_aux@listData$UMAP$pseudotime <- pc1_coord_scaled

# DE along pc1-pseudotime
pseudotime_genes <- fit_models(alphaM, model_formula_str = "~pseudotime", expression_family = "quasipoisson", cores = 1, reduction_method = NULL, verbose = TRUE)
pseudotime_coefs <- coefficient_table(pseudotime_genes)
pseudotime_filtered <- filter(pseudotime_coefs, term == "pseudotime" & q_value < 0.05 & num_cells_expressed > 50) # & abs(estimate) > 0.45)
pseudotime_filtered <- select(pseudotime_filtered, gene_short_name, term, q_value, num_cells_expressed, estimate)
pseudotime_filtered <- arrange(pseudotime_filtered, q_value)
evaluated_fits <- evaluate_fits(pseudotime_genes)
pseudotime_t250 <- head(pseudotime_filtered, 250)
pseudotime_t500 <- head(pseudotime_filtered, 500)
pseudotime_t1000 <- head(pseudotime_filtered, 1000)

# Upregulated and downregulated across pseudotime - based on estimate coeff
pseudotime_up <- filter(pseudotime_t1000, estimate > 0)
pseudotime_down <- filter(pseudotime_t1000, estimate < 0)

# Export expression matrix to loom
exprMat <- as.matrix(exprs(alphaM1000))
exprMatLoom <- create("pyscenic_5/exprMat.loom", exprMat, do.transpose = TRUE)
exprMatLoom$close_all()

# Export mtx metadata as csv
metadata_df <- data_frame('cellname' = names(pc1_coord_scaled), 'celltype' = Idents(alpha_new), 'pseudotime' = unname(pc1_coord_scaled))
write.csv(metadata_df, "~/Documents/R/gradientWeighing/pyscenic_5/metadata_df.csv")
write.csv(unknown_names, "~/Documents/R/gradientWeighing/pyscenic_5/unknown_names.csv")

# Top 1000 genes, upregulated, downregulated into txt
gene_names <- noquote(pseudotime_t1000$gene_short_name)
up_names <- noquote(pseudotime_up$gene_short_name)
down_names <- noquote(pseudotime_down$gene_short_name)
write.table(gene_names, "monoclet1000.txt", sep = '\t', quote = FALSE, row.names = FALSE, col.names = FALSE)
write.table(up_names, "pseudotime_t1000_up.txt", sep = '\t', quote = FALSE, row.names = FALSE, col.names = FALSE)
write.table(down_names, "pseudotime_t1000_down.txt", sep = '\t', quote = FALSE, row.names = FALSE, col.names = FALSE)

