#Violin and dot plots to see expression of interesting genes

In [0]:
.libPaths(c("/dbfs/home/jtrincado@almirall.com/my_r_packages/Seurat", .libPaths()))
library(dittoSeq)
library(dplyr)
library(openxlsx)

.libPaths(c("/dbfs/home/jtrincado@almirall.com/my_r_packages/Seurat_v2", .libPaths()))
library(Seurat)

In [0]:
# dotplot_generator: Creates a dot plot based on Seurat object data
# Arguments:
#   - seurat_obj: A Seurat object containing single-cell RNA-seq data
#   - genes_vector: A vector of gene names to include in the plot
# ** cell type classification has to be named "celltype_AR" and condition/disease/stimulus named "Condition_AR"
dotplot_generator <- function(seurat_obj, genes_vector) {  # Extract cell attributes (cell types and conditions)
  cell_attributes_for_plot <- seurat_obj@meta.data[, c("celltype_AR", "Condition_AR")]
  colnames(cell_attributes_for_plot) <- c("celltype_AR", "Condition_AR")
  cell_attributes_for_plot <- tibble::rownames_to_column(cell_attributes_for_plot, "barcode")

  # Create a table of counts for each read
  counts_table_for_plot <- as.data.frame(t(SeuratObject::GetAssayData(seurat_obj, "RNA")[genes_vector,])) |>
    tibble::rownames_to_column("barcode") |>
    tidyr::pivot_longer(cols = genes_vector, names_to = "gene", values_to = "counts")
print(counts_table_for_plot)
  # Merge the two tables by barcode
  counts_with_attributes_for_plot <- inner_join(cell_attributes_for_plot, counts_table_for_plot, by = join_by(barcode))

  # Calculate fraction of cells expressing each gene and average expression
  to_plot <- dplyr::summarize(counts_with_attributes_for_plot,
    frac_expressed = sum(counts > 0) / n(),
    avg_expression = mean(counts),
    .by = c("celltype_AR", "Condition_AR", "gene"))
    
  # Filter out genes with zero expression, because having a dot in 0 expression is confusing
  to_plot <- to_plot[to_plot$frac_expressed != 0, ]  
  # Generate the dot plot
  library(ggplot2)
  plot <- ggplot(to_plot) +
    geom_point(
      aes(
        x = gene,
        y = celltype_AR,
        size = frac_expressed,
        fill = avg_expression,
        color = Condition_AR,
        group = Condition_AR
      ),
      position = position_dodge(width = 0.5),
      shape = 21,
      stroke = 1  # Set the width of the dot border, I set it wider because is better to visualize
    ) +
    labs(
      x = "Gene",
      y = "Cell type",
      color = "Condition",
      fill = "Average expression",
      size = "Fraction expressed"
    ) +
    scale_fill_distiller(type = "seq", palette = "Blues", direction = 1) +
    theme_bw()

plot
}

##On single cell data

In [0]:
AR <- readRDS(file="/dbfs/mnt/sandbox/TFM_PAULA/MERGED_ARdatasets_celltypist_TFM.rds")

In [0]:
AR_f <- subset(AR, Condition_AR %in% c("Lesional", "HC"))
AR_f <- subset(AR_f, celltype_AR %in% c("Undifferentiated_KC", "Differentiated_KC", "Tc", "Th", "Treg", "NK", "Fibroblasts", "Melanocytes"))

In [0]:
th_genes_commented <- c("IL13", "IL21", "CCL17")
tc_genes_commented <- c("IL26", "APOBEC3B")
kc_genes_commented <- c("SPRR1A", "SPRR1B", "LCE3E", "KRT17", "ALOX12B", "ALOXE3", "IL1A", "HLA-DQB1")

In [0]:
genes_tc_th <- c(th_genes_commented, tc_genes_commented)

In [0]:
options(repr.plot.width=900, repr.plot.height=900,  echo= F)

In [0]:
dotplot_generator(AR_f, genes_tc_th)

In [0]:
options(repr.plot.width=800, repr.plot.height=700,  echo= F)

In [0]:
dotplot_generator(AR_f, th_genes_commented)

In [0]:
dotplot_generator(AR_f, tc_genes_commented)

In [0]:
options(repr.plot.width=1100, repr.plot.height=900,  echo= F)

In [0]:
dotplot_generator(AR_f, kc_genes_commented)

##On pseudobulk (violins)

In [0]:
pseudo_AR <- readRDS("/dbfs/mnt/sandbox/TFM_PAULA/AR_MERGED_celltypist_aggregated_expression_TFM.rds")

In [0]:
pseudo_AR_f <- subset(pseudo_AR, Condition_AR %in% c("Lesional", "HC"))

In [0]:
pseudo_AR_filtered <- subset(pseudo_AR_f, celltype_AR == "Differentiated_KC")

In [0]:
options(repr.plot.width=1000, repr.plot.height=1100,  echo= F)

In [0]:
dittoPlot(pseudo_AR_filtered, kc_genes_commented, group.by = "Condition_AR",
    plots = c("vlnplot", "jitter"), main = "Differentiated Keratinocytes")

In [0]:
pseudo_AR_filtered_th <- subset(pseudo_AR_f, celltype_AR == "Th")


In [0]:
options(repr.plot.width=1000, repr.plot.height=450,  echo= F)

In [0]:
dittoPlot(pseudo_AR_filtered_th, th_genes_commented, group.by = "Condition_AR",
    plots = c("vlnplot", "jitter"), main = "Th cells")

In [0]:
pseudo_AR_filtered_tc <- subset(pseudo_AR_f, celltype_AR == "Tc")


In [0]:
dittoPlot(pseudo_AR_filtered_tc, tc_genes_commented, group.by = "Condition_AR",
    plots = c("vlnplot", "jitter"), , main = "Tc cells")

#Before celltypist

In [0]:
AR <- readRDS(file="/dbfs/mnt/sandbox/TFM_PAULA/MERGED_ARdatasets_TFM.rds")

In [0]:
AR_f <- subset(AR, Condition_AR %in% c("Lesional", "HC"))
AR_f <- subset(AR_f, celltype_AR %in% c("KC", "TC", "Treg", "NK", "Fibroblasts", "Melanocytes"))

In [0]:
genes <- c("CTSC", "CTSS", "LYZ", "PSMA6")
genes_kc <- c("CTSC", "CTSS", "LYZ")

In [0]:
options(repr.plot.width=800, repr.plot.height=800,  echo= F)

In [0]:
dotplot_generator(AR_f, genes)

In [0]:
pseudo_AR <- readRDS("/dbfs/mnt/sandbox/TFM_PAULA/AR_MERGED_aggregated_expression_TFM.rds")

In [0]:
pseudo_AR_f <- subset(pseudo_AR, Condition_AR %in% c("Lesional", "HC"))

In [0]:
pseudo_AR_filtered <- subset(pseudo_AR_f, celltype_AR == "KC")

In [0]:
pseudo_AR_filtered_tc <- subset(pseudo_AR_f, celltype_AR == "TC")

In [0]:
options(repr.plot.width=1000, repr.plot.height=450,  echo= F)

In [0]:
dittoPlot(pseudo_AR_filtered, genes_kc, group.by = "Condition_AR",
    plots = c("vlnplot", "jitter"), main = "Expression in KC")

In [0]:
options(repr.plot.width=500, repr.plot.height=500,  echo= F)

In [0]:
dittoPlot(pseudo_AR_filtered_tc, "PSMA6", group.by = "Condition_AR",
    plots = c("vlnplot", "jitter"), main = "PSMA6 in T cells")