#Reannotate the cell types at Alkon dataset according to the celltypist predictions

In [0]:
.libPaths(c("/dbfs/home/jtrincado@almirall.com/my_r_packages/Seurat_v2", .libPaths()))
library(dplyr)
library(Seurat)
library(patchwork)
library(ggplot2)
library(dittoSeq)
library(dplyr)

In [0]:
celltype_predictions <- read.csv('/dbfs/mnt/sandbox/TFM_PAULA/Celltypist_ALKON_predictions_Adult_Human_Skin.csv')

In [0]:
alkon_orig <- readRDS("/dbfs/mnt/sandbox/TFM_PAULA/ALKON_PROCESSED_TFM.rds")

In [0]:
colnames(celltype_predictions)

In [0]:
celltype_predictions_f <- celltype_predictions[,c(16:19)]
rownames(celltype_predictions_f) <- celltype_predictions$X

alkon <- AddMetaData(alkon_orig, metadata = celltype_predictions_f)
head(alkon@meta.data, 5)

In [0]:
#What is the mean, meadian and min confidence score for each cell type prediction
stats <- celltype_predictions %>%
  group_by(majority_voting) %>%
  summarise(
    mean_conf_score = mean(conf_score),
    median_conf_score = median(conf_score),
    min_conf_score = min(conf_score),
    max_conf_score = max(conf_score),
    sd_conf_score = sd(conf_score),
  )
  stats

We have to take into account there are cells found in Alkon that are not in Reynolds and the package is forcing them to be another cell types. I will use just the reannotation in KC and Tcells.

In [0]:
options(repr.plot.width=1200, repr.plot.height=1200)

In [0]:
DimPlot(alkon, reduction = "umap", group.by = "majority_voting", label = T) +
  NoAxes()

In [0]:
DimPlot(alkon, reduction = "umap", group.by = "predicted_labels", label = T) +
  NoAxes()

In [0]:
options(repr.plot.width=1900, repr.plot.height=1200)

In [0]:
plot1 <- DimPlot(alkon, reduction = "umap", group.by = "h_celltype_v4", label = T) +
  NoAxes()
plot2 <- DimPlot(alkon, reduction = "umap", group.by = "majority_voting", label = T) +
  NoAxes()

plot1 + plot2

In [0]:
# LE: lymphatic endothelial
# VE: vascular endothelial
# F2: fibroblasts

In [0]:
table(alkon$majority_voting)


In [0]:
# Big cell type assignment
alkon$big_cell_type <- as.character(alkon$majority_voting)
alkon$big_cell_type[grep("F1|F2",alkon$big_cell_type)] <- "Fibroblasts"
alkon$big_cell_type[grep("Pericyte_2",alkon$big_cell_type)] <- "Pericyte"
alkon$big_cell_type[grep("Macro_2",alkon$big_cell_type)] <- "Macro"

In [0]:
ggplot(alkon@meta.data, aes(x = h_celltype_v4 , fill = big_cell_type)) +
  geom_bar(position = "fill") +  # Use "dodge" to separate bars for each predicted_labels
  labs(title = "Predicted Labels by Cluster",
       x = "h_celltype_v4",
       y = "Count",
       fill = "Predicted Labels") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  scale_fill_manual(values=dittoColors())

In [0]:
unique(alkon$big_cell_type)

In [0]:
unique(alkon$h_celltype_v4)

In [0]:
plot1 <- DimPlot(alkon, reduction = "umap", group.by = "majority_voting", label = T) +
  NoAxes()
plot2 <- DimPlot(alkon, reduction = "umap", group.by = "seurat_clusters", label = T) +
  NoAxes()

plot1 + plot2

In [0]:
plot1 <- DimPlot(alkon, reduction = "umap", group.by = "h_celltype_v4", label = T) +
  NoAxes()
plot2 <- DimPlot(alkon, reduction = "umap", group.by = "seurat_clusters", label = T) +
  NoAxes()

plot1 + plot2

In [0]:
# Create another smoothened clustering
alkon$celltypist <- ifelse(alkon$seurat_clusters %in% c(3, 5, 11, 20), 
                                             "Fibroblasts", 
                                             ifelse(alkon$seurat_clusters %in% c(9, 2, 30, 8), 
                                                    "Endothelial blood",
                                                    ifelse(alkon$seurat_clusters %in% c(6, 27, 25), 
                                                    "Prolif",
                                                        ifelse(alkon$seurat_clusters %in% c(12, 14, 29), 
                                                        "Smooth Muscle",
                                                               ifelse(alkon$seurat_clusters %in% c(26), 
                                                               "Sweat Gland",
                                                                      ifelse(alkon$seurat_clusters %in% c(19), 
                                                                      "Melanocyte",
                                                                             ifelse(alkon$seurat_clusters %in% c(21), 
                                                                             "Plasmocytoid  DC",
                                                                                    ifelse(alkon$seurat_clusters %in% c(15), 
                                                                                    "LE",
                                                        alkon$big_cell_type))))))))

In [0]:
plot1 <- DimPlot(alkon, reduction = "umap", group.by = "h_celltype_v4", label = T) +
  NoAxes()
plot2 <- DimPlot(alkon, reduction = "umap", group.by = "celltypist", label = T) +
  NoAxes()

plot1 + plot2

In [0]:
names(alkon@meta.data)

In [0]:
saveRDS(alkon, "/dbfs/mnt/sandbox/TFM_PAULA/ALKON_CELLTYPIST_TFM.rds")

In [0]:
# Create another smoothened clustering
alkon$smoothened_cluster_corrected <- ifelse(alkon$h_celltype_v4 %in% c("Smooth Muscle", "Sweat Gland", "Prolif", "Plasmacytoid DC", "LE", "Melanocytes"), 
                                             alkon$h_celltype_v4, 
                                             alkon$big_cell_type)

In [0]:
plot1 <- DimPlot(alkon, reduction = "umap", group.by = "h_celltype_v4", label = T) +
  NoAxes()
plot2 <- DimPlot(alkon, reduction = "umap", group.by = "smoothened_cluster_corrected", label = T) +
  NoAxes()

plot1 + plot2

In [0]:
# saveRDS(alkon, "/dbfs/mnt/sandbox/TFM_PAULA/ALKON_CELLTYPIST_TFM.rds")