#Find DEGs with pseudobulk (limma)
##AD/PN – Alkon et al, 2023
- Does not include non-lesional samples.
- 5 AD, 7 PN, 1 AP (atopic prurigo) and 4 healthy control​​

Here will be performed the analysis to find DEGs of each relevant cell type in Alkon et al, 2023 dataset following: https://satijalab.org/seurat/articles/de_vignette#perform-de-analysis-after-pseudobulking).

###Most relevant cell types: 
T-cells (TC), Fibroblasts, Keratinocytes (KC),  Monocytes, Macrophages, Dendritic cells, Natural killers, Treg and MastC

###Constrast: 
  - Lesional vs Healthy control (LvsHC)

In [0]:
.libPaths(c("/dbfs/home/jtrincado@almirall.com/my_r_packages/Seurat", .libPaths()))
library(dplyr)
library(openxlsx)

In [0]:
my_library <- "/dbfs/home/pdelgadom@almirall.com/my_r_packages/tfm_paula_4"
dir.create(my_library, recursive=TRUE, showWarnings=FALSE)
.libPaths(c(my_library, .libPaths()))
if (!requireNamespace("remotes")) install.packages("remotes")
 
install_from_github <- function(pkg_name, my_library=NULL) {
  if (is.null(my_library)) {
    my_library <- .libPaths()[1]
    message("Installing ", pkg_name, " to ", my_library)
  }
 
  temp_library <- tempfile()
  dir.create(temp_library)
  #remotes::install_cran(pkg_name, lib = temp_library, upgrade=FALSE)
  remotes::install_bioc(pkg_name, lib=temp_library, upgrade=FALSE)
  #remotes::install_github(pkg_name, lib = temp_library, upgrade=FALSE)
  for (x in list.files(temp_library)) {
    file.copy(
      file.path(temp_library, x),
      my_library,
      recursive=TRUE
    )
  }
}

In [0]:
if (!requireNamespace("limma"))install_from_github("limma")

In [0]:
if (!requireNamespace("edgeR"))install_from_github("edgeR")

In [0]:
.libPaths(c("/dbfs/home/pdelgadom@almirall.com/my_r_packages/tfm_paula_4", .libPaths()))
library(limma)
library(edgeR)

In [0]:
.libPaths(c("/dbfs/home/boriol@almirall.com/my_r_packages/bulkRNASeq_PBMCs_R4.3", .libPaths()))
library(EnhancedVolcano)
library(VennDiagram)

In [0]:
#Load required libraries
.libPaths(c("/dbfs/home/jtrincado@almirall.com/my_r_packages/Seurat_v2/", .libPaths()))
library(Seurat)

In [0]:
volcano_generator<- function(resultsDE){
resultsDE<- as.data.frame(resultsDE)

#Create anotations for volcano plot
resultsDE0 <- resultsDE
rownames(resultsDE0) <- resultsDE0$gene

top10_genes <- resultsDE0 %>% 
filter(logFC > 1 & adj.P.Val < 0.05 ) %>%
arrange(adj.P.Val) %>% top_n(10, -adj.P.Val)

bottom10_genes <-  resultsDE0 %>% 
filter(logFC < -1 & adj.P.Val < 0.05 ) %>%
arrange(adj.P.Val) %>% top_n(10, -adj.P.Val)

#Plot Volcano
volcano <- (EnhancedVolcano(resultsDE0,
lab = rownames(resultsDE0),
x = 'logFC',
y = 'adj.P.Val',
pCutoff = 0.05,
selectLab = c(top10_genes$gene, bottom10_genes$gene),
labSize = 5,
drawConnectors = TRUE,
widthConnectors = 0.5,
colConnectors = 'black'))
volcano

}

##Read data

In [0]:
#Read seurat object
alkon <- readRDS(file="/dbfs/mnt/sandbox/TFM_PAULA/ALKON_PROCESSED_TFM.rds")

In [0]:
# alkon$Condition <- ifelse(alkon$Condition == "AD", "Lesional", alkon$Condition)

##Filtering variables that have at least 3 counts

In [0]:
counts_matrix <- alkon[["RNA"]]$counts
dim(counts_matrix)

In [0]:
# Keep only rows that have a count of at least 3 counts in 3 samples
smallestGroupSize <- 3
keep <- rowSums(counts_matrix >= 3) >= smallestGroupSize
counts_keep <- counts_matrix[keep,]

# Subset the Seurat object to keep only the features in counts_keep
alkon_f <- subset(alkon, features = rownames(counts_keep))

# Assign the filtered counts to the new Seurat object
alkon_f[["RNA"]]$counts <- counts_keep

# Check dimensions
dim(alkon_f[["RNA"]]$counts)

In [0]:
head(alkon_f[["RNA"]]$counts)

##Pseudobulk the counts based on the donor id

In [0]:
# pseudobulk the counts based on donor-condition-celltype
pseudo_alkon <- AggregateExpression(alkon_f, assays = "RNA", return.seurat = T, group.by = c("Condition", "Sample_id", "h_celltype_v4"))

# each 'cell' is a donor-condition-celltype pseudobulk profile
tail(Cells(pseudo_alkon))

In [0]:
pseudo_alkon

In [0]:
pseudo_alkon$celltype.cond <- paste(pseudo_alkon$h_celltype_v4, pseudo_alkon$Condition, sep = "_")

In [0]:
Idents(pseudo_alkon) <- "celltype.cond"

##PCA

In [0]:

# Step 2: Normalize the data
pseudo_alkon <- NormalizeData(pseudo_alkon)

# Step 3: Find variable features
pseudo_alkon <- FindVariableFeatures(pseudo_alkon)

# Step 4: Scale the data
pseudo_alkon <- ScaleData(pseudo_alkon)

# Step 5: Run PCA
pseudo_alkon <- RunPCA(pseudo_alkon, features = VariableFeatures(object = pseudo_alkon))

In [0]:
# Visualize PCA results with sample ID labels and color by celltype
plot1 <- DimPlot(pseudo_alkon, reduction = "pca", group.by = "h_celltype_v4", label = TRUE, repel = TRUE)

# Visualize PCA results with sample ID labels and color by sample id
plot2 <- DimPlot(pseudo_alkon, reduction = "pca", group.by = "Condition", label = TRUE, repel = TRUE)

options(repr.plot.width=1500, repr.plot.height=1200)

plot1 + plot2

#Limma for pseudobulk

In [0]:
expr_matrix <- pseudo_alkon$RNA$counts
metadata <- data.frame(sample_id= colnames(expr_matrix))

In [0]:
# Split names into components by underscore
metadata$condition <- sapply(strsplit(as.character(metadata$sample_id), "_"), `[`, 1)
metadata$sample <- sapply(strsplit(as.character(metadata$sample_id), "_"), `[`, 2)
metadata$celltype <- sapply(strsplit(as.character(metadata$sample_id), "_"), `[`, 3)
rownames(metadata) <- metadata$sample_id

all(rownames(metadata) == colnames(expr_matrix))  # should be TRUE
 

In [0]:
# Extract sample names
samples <- colnames(expr_matrix)
 
# Parse to get cell types
celltypes <- sapply(strsplit(samples, "_"), function(x) x[3])
 
# Get unique cell types
unique_celltypes <- unique(celltypes)

In [0]:
expr_matrix <- as.matrix(expr_matrix)

In [0]:
wanted_celltypes <- c("NK", "Macro", "TC", "KC", "Fibroblasts", "Treg")
# 'samples' assumed to be: colnames of expr_matrix
samples <- colnames(expr_matrix)
alkon_limma_res <- list()
for (ct in wanted_celltypes) {
  # Subset sample names that end with the current cell type
  cols_ct <- samples[grepl(paste0("_", ct, "$"), samples)]
  if (length(cols_ct) < 2) {
    cat("Skipping cell type:", ct, "- not enough samples\n")
    next
  }
  # Extract metadata from column names
  split_list <- strsplit(cols_ct, "_")
  sample_info_ct <- do.call(rbind, split_list)
  colnames(sample_info_ct) <- c("condition", "sample", "celltype")
  sample_info_ct <- as.data.frame(sample_info_ct, stringsAsFactors = TRUE)
  rownames(sample_info_ct) <- cols_ct
  sample_info_ct$condition <- factor(sample_info_ct$condition, levels = c("HC", "AD"))
  # Subset expression matrix
  expr_ct <- expr_matrix[, cols_ct, drop = FALSE]
  expr_ct <- as.matrix(expr_ct)
  mode(expr_ct) <- "numeric"
  # Create DGEList and filter
  dge_ct <- DGEList(counts = expr_ct)
  keep <- filterByExpr(dge_ct, group = sample_info_ct$condition)
  dge_ct <- dge_ct[keep, , keep.lib.sizes = FALSE]
  dge_ct <- calcNormFactors(dge_ct)
  # Design matrix
  design_ct <- model.matrix(~ condition, data = sample_info_ct)
  # voom and limma
  v_ct <- voom(dge_ct, design_ct, plot = FALSE)
  fit_ct <- lmFit(v_ct, design_ct)
  fit_ct <- eBayes(fit_ct)
  # Extract DE results
  res_ct <- topTable(fit_ct, coef = "conditionAD", number = Inf)
  res_ct$celltype <- ct
  # Store results
  alkon_limma_res[[ct]] <- res_ct
  cat("Completed DE for cell type:", ct, "\n")
}
# Combine all cell type DE results
alkon_limma_res <- bind_rows(alkon_limma_res)
head(alkon_limma_res)

##Tcells

In [0]:
TC_results <- alkon_limma_res %>% filter(celltype=="TC")

In [0]:
TC_results$gene <- rownames(TC_results)

In [0]:
TC_results$logFC

In [0]:
class(TC_results$adj.P.Val)

In [0]:
sum(is.na(TC_results$logFC))

In [0]:
volcano_generator(TC_results)

##Keratinocytes

In [0]:
KC_results <-alkon_limma_res %>% filter(celltype=="KC")

In [0]:
KC_results$gene <- rownames(KC_results)

In [0]:
KC_results$logFC

In [0]:
class(KC_results$adj.P.Val)

In [0]:
sum(is.na(KC_results$logFC))

In [0]:
volcano_generator(KC_results)

In [0]:
display(FB_results)

##Fibroblast

In [0]:
FB_results <- alkon_limma_res %>% filter(celltype=="Fibroblasts")

In [0]:
FB_results$gene <- rownames(FB_results)

In [0]:
volcano_generator(FB_results)

##Treg

In [0]:
Treg_results <- alkon_limma_res %>% filter(celltype=="Treg")

In [0]:
Treg_results$gene <- rownames(Treg_results)

In [0]:
volcano_generator(Treg_results)

##Macro

In [0]:
Macro_results <- alkon_limma_res %>% filter(celltype=="Macro")

In [0]:
Macro_results$gene <- rownames(Macro_results)

In [0]:
volcano_generator(Macro_results)

#Save

In [0]:
# Delete existing Excel files
# file.remove("/dbfs/mnt/sandbox/TFM_PAULA/Alkon/pseudobulk/alkon_limma_results_tcell.xlsx")
# file.remove("/dbfs/mnt/sandbox/TFM_PAULA/Alkon/pseudobulk/alkon_limma_results_kc.xlsx")
# file.remove("/dbfs/mnt/sandbox/TFM_PAULA/Alkon/pseudobulk/alkon_limma_results_FB.xlsx")
# file.remove("/dbfs/mnt/sandbox/TFM_PAULA/Alkon/pseudobulk/alkon_limma_results_macro.xlsx")
# file.remove("/dbfs/mnt/sandbox/TFM_PAULA/Alkon/pseudobulk/alkon_limma_results_treg.xlsx")

# Write the data frame to new Excel files
write.xlsx(TC_results, "/dbfs/mnt/sandbox/TFM_PAULA/Alkon/pseudobulk/alkon_limma_results_tcell.xlsx")
write.xlsx(KC_results, "/dbfs/mnt/sandbox/TFM_PAULA/Alkon/pseudobulk/alkon_limma_results_kc.xlsx")
write.xlsx(FB_results, "/dbfs/mnt/sandbox/TFM_PAULA/Alkon/pseudobulk/alkon_limma_results_FB.xlsx")
write.xlsx(Macro_results, "/dbfs/mnt/sandbox/TFM_PAULA/Alkon/pseudobulk/alkon_limma_results_macro.xlsx")
write.xlsx(Treg_results, "/dbfs/mnt/sandbox/TFM_PAULA/Alkon/pseudobulk/alkon_limma_results_treg.xlsx")

In [0]:
display(TC_results)

In [0]:
display(KC_results)