In [None]:
suppressMessages(library(ArchR))
suppressMessages(library(Seurat))
suppressMessages(library(Signac))
suppressMessages(library(harmony))
suppressMessages(library(dplyr))
suppressMessages(library(cowplot))

In [2]:
obj.rna <- readRDS("../../../snRNA/from_rico/integrated_snrnaseq/integrated_rnasamples_ann.rds")

In [3]:
obj.rna

An object of class Seurat 
29126 features across 191795 samples within 1 assay 
Active assay: RNA (29126 features, 0 variable features)
 3 dimensional reductions calculated: pca, harmony, umap_harmony

In [4]:
unique(obj.rna$cell_type)

In [5]:
## subset all vSMCs cells
obj.rna <- subset(obj.rna, cell_type == "Mast")

In [6]:
obj.rna

An object of class Seurat 
29126 features across 719 samples within 1 assay 
Active assay: RNA (29126 features, 0 variable features)
 3 dimensional reductions calculated: pca, harmony, umap_harmony

In [7]:
head(obj.rna@meta.data)

Unnamed: 0_level_0,orig.ident,nCount_RNA,nFeature_RNA,percent.mt,doublet_score,doublet,dissociation_s1,opt_clust,patient,batch,opt_clust_integrated,cell_type
Unnamed: 0_level_1,<chr>,<dbl>,<int>,<dbl>,<dbl>,<chr>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>
AAAGGATTCGTTCAGA-1_1_1_1_1_1_1_1_1_1_1_1_1_1_1,CK158,2404,1663,0.12479201,0.0341406241,singlet,0.13439442,7,P1,A,25,Mast
AAGTCGTTCTTACCGC-1_1_1_1_1_1_1_1_1_1_1_1_1_1_1,CK158,1110,844,0.09009009,0.0001993882,singlet,0.14536462,19,P1,A,25,Mast
ACCACAAGTTCCGCAG-1_1_1_1_1_1_1_1_1_1_1_1_1_1_1,CK158,1723,1205,0.23215322,0.0294415131,singlet,0.09167438,19,P1,A,25,Mast
ACCCTTGCAAATGATG-1_1_1_1_1_1_1_1_1_1_1_1_1_1_1,CK158,2149,1421,0.18613309,0.0003221695,singlet,0.0688999,19,P1,A,25,Mast
ACTATCTCATCCTGTC-1_1_1_1_1_1_1_1_1_1_1_1_1_1_1,CK158,1338,994,0.07473842,0.0001510231,singlet,0.12239567,19,P1,A,25,Mast
ACTGTGAAGAGGTGCT-1_1_1_1_1_1_1_1_1_1_1_1_1_1_1,CK158,1788,1322,0.16778523,0.0007243955,singlet,0.09527012,7,P1,A,25,Mast


In [8]:
## add annotation for samples
df_sample_annotation <- read.csv("../../../snRNA/metadata/sample_annotation.csv", header = TRUE)
head(df_sample_annotation)

sample_to_region <- df_sample_annotation$region_novel
sample_to_patient <- df_sample_annotation$patient
sample_to_patient_region <- df_sample_annotation$patient_region_id
sample_to_patient_group <- df_sample_annotation$patient_group

names(sample_to_region) <- df_sample_annotation$sample_id
names(sample_to_patient) <- df_sample_annotation$sample_id
names(sample_to_patient_region) <- df_sample_annotation$sample_id
names(sample_to_patient_group) <- df_sample_annotation$sample_id

Unnamed: 0_level_0,sample_id,region_novel,patient,patient_region_id,patient_group
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>
1,CK158,control,P1,control_P1,group_1
2,CK162,FZ/GT,P4,FZ/GT_P4,group_3
3,CK165,IZ/BZ,P2,IZ/BZ_P2,group_2
4,CK163,RZ/BZ,P2,RZ/BZ_P2,group_1
5,CK161,IZ,P3,IZ_P3,group_2
6,CK160,RZ/BZ,P3,RZ/BZ_P3,group_1


In [9]:
obj.rna@meta.data$region <- stringr::str_replace_all(obj.rna@meta.data$orig.ident, sample_to_region)
obj.rna@meta.data$patient <- stringr::str_replace_all(obj.rna@meta.data$orig.ident, sample_to_patient)
obj.rna@meta.data$patient_region_id <- stringr::str_replace_all(obj.rna@meta.data$orig.ident, sample_to_patient_region)
obj.rna@meta.data$patient_group <- stringr::str_replace_all(obj.rna@meta.data$orig.ident, sample_to_patient_group)

In [10]:
df_cell_count <- as.data.frame(obj.rna@meta.data) %>%
    group_by(orig.ident) %>%
    summarise(count = n()) %>%
    arrange(desc(count))

df_cell_count

orig.ident,count
<chr>,<int>
CK368,139
CK365,58
CK358,56
CK357,49
CK370,49
CK367,48
CK364,37
CK158,34
CK164,30
CK362,30


In [11]:
# we remove samples with less than 50 cells
df_cell_count <- subset(df_cell_count, count > 10)
obj.rna.sub <- subset(obj.rna, orig.ident %in% df_cell_count$orig.ident)

obj.rna.sub

An object of class Seurat 
29126 features across 686 samples within 1 assay 
Active assay: RNA (29126 features, 0 variable features)
 3 dimensional reductions calculated: pca, harmony, umap_harmony

In [None]:
obj.rna.sub <- obj.rna.sub %>% 
        NormalizeData() %>%
        FindVariableFeatures() %>%
        ScaleData() %>%
        RunPCA(verbose = FALSE) %>%
        RunUMAP(dims = 1:30)

In [None]:
options(repr.plot.height = 5, repr.plot.width = 20)

p1 <- DimPlot(obj.rna.sub, reduction = "umap", group.by = "orig.ident")
p2 <- DimPlot(obj.rna.sub, reduction = "umap", group.by = "patient")
p3 <- DimPlot(obj.rna.sub, reduction = "umap", group.by = "region")
p4 <- DimPlot(obj.rna.sub, reduction = "umap", group.by = "patient_group", label = TRUE)

patchwork::wrap_plots(list(p1, p2, p3, p4), nrow = 1)

In [None]:
obj.rna.sub <- RunHarmony(obj.rna.sub, 
                      group.by.vars = c("orig.ident", "patient", "region"),
                     reduction = "pca", 
                      max.iter.harmony = 30, 
                      dims.use = 1:30,
                     project.dim = FALSE,
                     plot_convergence = TRUE)


obj.rna.sub <- RunUMAP(obj.rna.sub, 
               dims = 1:30, 
               reduction = 'harmony',
               reduction.name = "umap_harmony",
               reduction.ke = 'umapharmony_',
              verbose = FALSE,
                   min.dist = 0.4)

In [None]:
options(repr.plot.height = 5, repr.plot.width = 20)

p1 <- DimPlot(obj.rna.sub, reduction = "umap_harmony", group.by = "orig.ident")
p2 <- DimPlot(obj.rna.sub, reduction = "umap_harmony", group.by = "patient")
p3 <- DimPlot(obj.rna.sub, reduction = "umap_harmony", group.by = "region")
p4 <- DimPlot(obj.rna.sub, reduction = "umap_harmony", group.by = "patient_group", label = TRUE)

patchwork::wrap_plots(list(p1, p2, p3, p4), nrow = 1)

In [None]:
obj.rna.sub <- FindNeighbors(obj.rna.sub, reduction = "harmony", dims = 1:30)
obj.rna.sub <- FindClusters(obj.rna.sub, resolution = 0.1, verbose = FALSE)

In [None]:
options(repr.plot.height = 5, repr.plot.width = 5)
DimPlot(obj.rna.sub, reduction = "umap_harmony", group.by = "RNA_snn_res.0.1", label = TRUE)

In [None]:
# suppressMessages(library(Nebulosa))

# options(repr.plot.height = 5, repr.plot.width = 10)

# p1 <- plot_density(obj.rna.sub, features="KIT", reduction="umap_harmony")
# p2 <- plot_density(obj.rna.sub, features="CPA3", reduction="umap_harmony")

# patchwork::wrap_plots(list(p1, p2), nrow = 1)

In [None]:
if(!dir.exists("../data/snRNA")){
    dir.create("../data/snRNA")
}

In [None]:
# find marker
Idents(obj.rna.sub) <- "RNA_snn_res.0.1"

all.markers <- FindAllMarkers(obj.rna.sub, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)

df <- all.markers %>%
    group_by(cluster) %>%
    slice_max(n = 10, order_by = avg_log2FC)

# Dot plots - the size of the dot corresponds to the percentage of cells expressing the
# feature in each cluster. The color represents the average expression level
options(repr.plot.height = 4, repr.plot.width = 15)
DotPlot(obj.rna.sub, features = unique(df$gene)) + RotatedAxis()

In [None]:
markerList <- split(all.markers, all.markers$cluster)

for(i in 1:length(markerList)){
    markerList[[i]] <- markerList[[i]][order(-markerList[[i]]$avg_log2FC), ]
}

WriteXLS::WriteXLS(markerList,
                   ExcelFileName = "../data/snRNA/MarkerGenes.xlsx",
                   SheetNames = names(markerList))

saveRDS(all.markers, "../data/snRNA/MarkerGenes.rds")

In [None]:
saveRDS(obj.rna.sub, "../data/snRNA/snRNA.rds")

In [None]:
sessionInfo()