In [1]:
suppressMessages(library(ArchR))
suppressMessages(library(Seurat))
suppressMessages(library(Signac))
suppressMessages(library(harmony))
suppressMessages(library(dplyr))
suppressMessages(library(cowplot))
suppressMessages(library(harmony))
suppressMessages(library(Nebulosa))
suppressMessages(library(ggpubr))
suppressMessages(library(Ipaper))

In [2]:
set.seed(42)
getwd()

In [None]:
coembed <- readRDS("../data/coembed/coembed.Rds")
coembed

In [None]:
options(repr.plot.height = 10, repr.plot.width = 20)

ps1 <- plot_density(coembed, features=c("DTHD1",
                                       "IL7R", "CDC14A",
                                       "LEF1", "SERINC5",
                                       "GNLY", "TXK", "KCNQ5"), reduction="umap_harmony", combine = FALSE)

patchwork::wrap_plots(ps1, nrow = 2)

In [None]:
## get number of cells per cluster and sample
df_count <- coembed@meta.data %>%
    as.data.frame() %>%
    subset(., tech == "RNA") %>%
    group_by(orig.ident, RNA_snn_res.0.9) %>%
    summarise(count = n()) %>%
    tidyr::pivot_wider(names_from = RNA_snn_res.0.9, values_from = count, values_fill = 0)

write.csv(df_count, "../data/coembed/stat_cell_counts_per_sample_rna.csv", row.names = FALSE)

In [None]:
coembed.sub <- subset(coembed, RNA_snn_res.0.9 %in% c(1, 2, 3, 4, 8))

In [None]:
cols.clusters <- ArchR::paletteDiscrete(coembed.sub@meta.data[, "RNA_snn_res.0.9"])

options(repr.plot.height = 6, repr.plot.width = 6)

p <- DimPlot(coembed.sub, group.by = "RNA_snn_res.0.9", label = TRUE,
             reduction = "umap_harmony", shuffle = TRUE, cols = cols.clusters) +
    xlab("UMAP1") + ylab("UMAP2")

p

In [None]:
coembed.sub <- RunUMAP(coembed.sub, 
               dims = 1:30, 
               reduction = 'harmony',
               reduction.name = "umap_harmony_v2",
               reduction.ke = 'umap_harmony_v2_',
              verbose = FALSE,
                   min.dist = 0.4)

In [None]:
p <- DimPlot(coembed.sub, group.by = "RNA_snn_res.0.9", label = TRUE,
             reduction = "umap_harmony_v2", shuffle = TRUE, cols = cols.clusters) +
    xlab("UMAP1") + ylab("UMAP2")

p

In [None]:
## add annotation
new.cluster.ids <- c("1" = "CD8",
               "2" = "CD4",
               "3" = "CD4",
               "4" = "NK",
               "8" = "NK_T")

Idents(coembed.sub) <- "RNA_snn_res.0.9"

coembed.sub <- RenameIdents(coembed.sub, new.cluster.ids)
coembed.sub$annotation <- Idents(coembed.sub)

In [None]:
cols.celltype <- ArchR::paletteDiscrete(coembed.sub@meta.data[, "annotation"])

p <- DimPlot(coembed.sub, group.by = "annotation", label = FALSE,
             reduction = "umap_harmony_v2", shuffle = TRUE) +
    scale_color_manual(values = cols.celltype) +
    xlab("UMAP1") + ylab("UMAP2")

options(repr.plot.height = 6, repr.plot.width = 6)

p

In [None]:
options(repr.plot.width = 10, repr.plot.height = 5)

p <- DimPlot(coembed.sub, group.by = "annotation", label = TRUE,
             reduction = "umap_harmony_v2", shuffle = TRUE, split.by = "tech", cols=cols.celltype) +
    xlab("UMAP1") + ylab("UMAP2")

p

In [None]:
meta.data <- as.data.frame(coembed.sub@meta.data)

In [None]:
df_plot <-  meta.data %>%
    group_by(patient, annotation) %>%
    summarise(counts = n()) %>%
    mutate(cell_proportion = counts / sum(counts))

p1 <- ggplot(df_plot, aes(patient, cell_proportion, fill=annotation)) + 
    geom_bar(stat="identity", position = position_stack(reverse = TRUE)) +
    scale_fill_manual(values = cols.celltype) +
    theme_cowplot() +
    xlab("") + ylab("") +
    theme(axis.text.x = element_text(angle = 60, hjust = 1))


options(repr.plot.width = 10, repr.plot.height = 6)

p1

In [None]:
df_plot <- meta.data %>%
    group_by(region, annotation) %>%
    summarise(counts = n()) %>%
    mutate(cell_proportion = counts / sum(counts))

p1 <- ggplot(df_plot, aes(region, cell_proportion, fill=annotation)) + 
    geom_bar(stat="identity", position = position_stack(reverse = TRUE)) +
    scale_fill_manual(values = cols.celltype) +
    theme_cowplot() +
    xlab("") + ylab("") +
    theme(axis.text.x = element_text(angle = 60, hjust = 1))


options(repr.plot.width = 10, repr.plot.height = 6)

p1

In [None]:
df_plot <- meta.data %>%
    group_by(patient_group, annotation) %>%
    summarise(counts = n()) %>%
    mutate(cell_proportion = counts / sum(counts))

p1 <- ggplot(df_plot, aes(patient_group, cell_proportion, fill=annotation)) + 
    geom_bar(stat="identity", position = position_stack(reverse = TRUE)) +
    scale_fill_manual(values = cols.celltype) +
    theme_cowplot() +
    xlab("") + ylab("") +
    theme(axis.text.x = element_text(angle = 60, hjust = 1))


options(repr.plot.width = 6, repr.plot.height = 6)

p1

In [None]:
df_plot <- meta.data %>% 
            group_by(patient_region_id, annotation) %>%
            summarise(counts = n()) %>%
            mutate(proportion = counts / sum(counts))

df_anno <- coembed@meta.data %>%
    as.data.frame() %>%
    subset(., select = c("patient_region_id", "patient_group")) %>%
    unique()

df_plot <- merge.data.frame(df_plot, df_anno)
    
    p <- ggplot(data = df_plot, aes(x = patient_group, y = proportion)) +
    geom_boxplot2(aes(color = patient_group)) +
    facet_wrap(~annotation, nrow = 1) +
    stat_compare_means(comparisons = list(c("group_1", "group_2"),
                                         c("group_2", "group_3"),
                                         c("group_1", "group_3")),
                   method = "wilcox.test") +
    theme_cowplot() +
    xlab("") + ylab("") +
    theme(axis.text.x = element_blank(),
         legend.title = element_blank())

options(repr.plot.width = 15, repr.plot.height = 5)

print(p)

In [None]:
Idents(coembed.sub) <- as.character(Idents(coembed.sub))

In [None]:
all.markers <- FindAllMarkers(coembed.sub, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
    
df <- all.markers %>%
    group_by(cluster) %>%
    slice_max(n = 10, order_by = avg_log2FC)

p <- DotPlot(coembed.sub, features = unique(df$gene)) + RotatedAxis()
    
options(repr.plot.height = 5, repr.plot.width = 15)

print(p)

In [None]:
if(!dir.exists("../data/coembed/MarkerGenesAfterAnnotation")){
    dir.create("../data/coembed/MarkerGenesAfterAnnotation")
}

In [None]:
markerList <- split(all.markers, all.markers$cluster)
    
    for(i in 1:length(markerList)){
        markerList[[i]] <- markerList[[i]][order(-markerList[[i]]$avg_log2FC), ]
    }
    
    WriteXLS::WriteXLS(markerList,
                   ExcelFileName = glue::glue("../data/coembed/MarkerGenesAfterAnnotation/res.0.9.xlsx"),
                   SheetNames = names(markerList))

saveRDS(all.markers, glue::glue("../data/coembed/MarkerGenesAfterAnnotation/res.0.9.rds"))

In [None]:
## mapping to spatial data
df_anno <- read.csv("../../../visiumSpatial/metadata/sample_annotation.csv", header = TRUE)

patient_region_id <- df_anno$patient_region_id
patient_region_id <- stringr::str_replace_all(patient_region_id, "/", "_")

names(patient_region_id) <- df_anno$sample_id

In [None]:
if(!dir.exists(glue::glue("../data/visium/cell_states_mapping_with_annotation"))){
        dir.create(glue::glue("../data/visium/cell_states_mapping_with_annotation"), recursive = TRUE)
}

In [None]:
head(all.markers)

In [None]:
saveRDS(coembed.sub, file = "../data/coembed/coembed.annotation.Rds")