#Comparison of the results with previous annotation

In [0]:
#Load required libraries

.libPaths(c("/dbfs/home/jtrincado@almirall.com/my_r_packages/Seurat", .libPaths()))
library(openxlsx)
library(dplyr)

.libPaths(c("/dbfs/home/boriol@almirall.com/my_r_packages/bulkRNASeq_PBMCs_R4.3", .libPaths()))
library(VennDiagram)
library(EnhancedVolcano)

.libPaths(c("/dbfs/home/jtrincado@almirall.com/my_r_packages/Seurat_v2", .libPaths()))
library(Seurat)


##KC subannotation

####Common DEGs between the different annotations

In [0]:
undif_kc <- read.xlsx("/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_celltypist_results/DEGs/bulk.undif.kc.de.cov.xlsx")
dif_kc <- read.xlsx("/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_celltypist_results/DEGs/bulk.dif.kc.de.cov.xlsx")
kc <- read.xlsx("/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_results/adjusting_cov_dataset_DEGs/bulk.kc.de.cov.xlsx")

In [0]:
dif_kc

In [0]:
undif_kc_filt <- undif_kc %>%
  filter(padj < 0.05, abs(log2FoldChange) > 1)
dif_kc_filt <- dif_kc %>%
  filter(padj < 0.05, abs(log2FoldChange) > 1)
kc_filt <- kc %>%
  filter(padj < 0.05, abs(log2FoldChange) > 1)

In [0]:
kc_filt

In [0]:
# Create a list of your gene sets
gene_sets <- list(
  "Undiff_KC" = na.omit(undif_kc_filt$gene),
  "Diff_KC" = na.omit(dif_kc_filt$gene),
  "KC" = na.omit(kc_filt$gene)
)

# Plot the Venn diagram with colors and title
venn.plot <- venn.diagram(
  x = gene_sets,
  category.names = c("Undiff_KC", "Diff_KC", "KC"),
  filename = NULL,  # Set to NULL to plot in RStudio
  output = TRUE,
  fill = c("red", "blue", "green"), # Add colors
  main = "DEGs comparison btw annotations", # Add title
  cat.dist = c(0.04, 0.04, 0.04), # Adjust the distance of the category names from the circles
  main.cex = 1, # Increase title size
  cat.cex = 0.8, # Increase label size
  cat.pos = c(-20, 20, 180), # Position labels more on the top and KC at the bottom
  cex = 1.5 # Increase numbers size
)

# Display the plot
grid.newpage()
grid.draw(venn.plot)

In [0]:
# Print genes that are present only in each variable
only_undiff_kc_genes <- setdiff(gene_sets$Undiff_KC, union(gene_sets$Diff_KC, gene_sets$KC))
only_diff_kc_genes <- setdiff(gene_sets$Diff_KC, union(gene_sets$Undiff_KC, gene_sets$KC))
only_kc_genes <- setdiff(gene_sets$KC, union(gene_sets$Undiff_KC, gene_sets$Diff_KC))

print(only_undiff_kc_genes)
print(only_diff_kc_genes)
print(only_kc_genes)

In [0]:
# Save the genes that are only in each set
only_undiff_kc <- undif_kc_filt %>% filter(!gene %in% dif_kc_filt$gene & !gene %in% kc_filt$gene )
only_diff_kc <- dif_kc_filt %>% filter(!gene %in% undif_kc_filt$gene & !gene %in% kc_filt$gene)

In [0]:
dim(only_undiff_kc)

In [0]:
dim(only_diff_kc)

In [0]:
if (file.exists("/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_celltypist_results/DEGs/genes_only_undiff_kc.xlsx")) {
  file.remove("/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_celltypist_results/DEGs/genes_only_undiff_kc.xlsx")
}
write.xlsx(only_undiff_kc, "/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_celltypist_results/DEGs/genes_only_undiff_kc.xlsx")

if (file.exists("/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_celltypist_results/DEGs/genes_only_dif_kc.xlsx")) {
  file.remove("/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_celltypist_results/DEGs/genes_only_dif_kc.xlsx")
}
write.xlsx(only_diff_kc, "/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_celltypist_results/DEGs/genes_only_dif_kc.xlsx")

####Common pathways between the different annotations

In [0]:
undif_kc_p <- read.xlsx("/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_celltypist_results/GSEA/undif_kc_reactome_genes.xlsx")
dif_kc_p <- read.xlsx("/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_celltypist_results/GSEA/dif_kc_reactome_genes.xlsx")
kc_p <- read.xlsx("/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_results/GSEA/kc_reactome_genes.xlsx")

In [0]:
max(dif_kc_p$p.adjust)

In [0]:
# Create a list of your pathway sets
pathway_sets <- list(
  "Undiff_KC" = na.omit(undif_kc_p$ID),
  "Diff_KC" = na.omit(dif_kc_p$ID),
  "KC" = na.omit(kc_p$ID)
)

# Plot the Venn diagram with colors and title
venn.plot <- venn.diagram(
  x = pathway_sets,
  category.names = c("Undiff_KC", "Diff_KC", "KC"),
  filename = NULL,  # Set to NULL to plot in RStudio
  output = TRUE,
  fill = c("red", "blue", "green"), # Add colors
  main = "Pathway comparison btw annotations", # Add title
  cat.dist = c(0.04, 0.04, 0.04), # Adjust the distance of the category names from the circles
  main.cex = 1, # Increase title size
  cat.cex = 0.8, # Increase label size
  cat.pos = c(-20, 20, 180), # Position labels more on the top and KC at the bottom
  cex = 1.5 # Increase numbers size
)

# Display the plot
grid.newpage()
grid.draw(venn.plot)

In [0]:
# Save the pathways that are only in each set
only_undiff_kc_p <- undif_kc_p %>% filter(!ID %in% dif_kc_p$ID & !ID %in% kc_p$ID )
only_diff_kc_p <- dif_kc_p %>% filter(!ID %in% undif_kc_p$ID & !ID %in% kc_p$ID)

In [0]:
dim(only_undiff_kc_p)

In [0]:
dim(only_diff_kc_p)

In [0]:
write.xlsx(only_undiff_kc_p, "/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_celltypist_results/GSEA/pathways_only_undiff_kc.xlsx")
write.xlsx(only_diff_kc_p, "/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_celltypist_results/GSEA/pathways_only_dif_kc.xlsx")

In [0]:
display(only_diff_kc_p)

#Comparison between Reynolds and Merge

##DEGs

In [0]:
dif_reynolds <- read.xlsx("/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_celltypist_results/DEGs/only_reynolds_bulk_undif_kc_de.xlsx")
undif_reynolds  <- read.xlsx("/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_celltypist_results/DEGs/only_reynolds_bulk_dif_kc_de.xlsx")

In [0]:
undif_alkon <- read.xlsx("/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_celltypist_results/DEGs/only_alkon_bulk_undif_kc_de.xlsx")
dif_alkon <- read.xlsx("/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_celltypist_results/DEGs/only_alkon_bulk_dif_kc_de.xlsx")

In [0]:
dif_reynolds <- dif_reynolds %>%
  filter(p_val_adj < 0.05, abs(avg_log2FC)  > 1)
undif_reynolds <- undif_reynolds %>%
  filter(p_val_adj < 0.05, abs(avg_log2FC)  > 1)

  dif_alkon <- dif_alkon %>%
  filter(p_val_adj < 0.05, abs(avg_log2FC)  > 1)
undif_alkon <- undif_alkon %>%
  filter(p_val_adj < 0.05, abs(avg_log2FC)  > 1)

In [0]:
# Create a list of your gene sets
gene_sets <- list(
  "Undiff_KC_Reynolds" = na.omit(undif_reynolds$gene),
  "Undiff_KC_Alkon" = na.omit(undif_alkon$gene),
    "Undiff_KC_AR" = na.omit(undif_kc_filt$gene)
)

# Plot the Venn diagram with colors and title
venn.plot <- venn.diagram(
  x = gene_sets,
  category.names = c("Undiff_KC_Reynolds", "Undiff_KC_Alkon", "Undiff_KC_AR"),
  filename = NULL,  # Set to NULL to plot in RStudio
  output = TRUE,
  fill = c("red", "blue", "green"), # Add colors
  main = "DEGs individual vs merged", # Add title
  cat.dist = c(0.04, 0.04, 0.04), # Adjust the distance of the category names from the circles
  main.cex = 1, # Increase title size
  cat.cex = 0.8, # Increase label size
  cat.pos = c(-20, 20, 180), # Position labels more on the top and KC at the bottom
  cex = 1.5 # Increase numbers size
)

# Display the plot
grid.newpage()
grid.draw(venn.plot)

In [0]:
# Create a list of your gene sets
gene_sets <- list(
  "Diff_KC_Reynolds" = na.omit(dif_reynolds$gene),
  "Diff_KC_Alkon" = na.omit(dif_alkon$gene),
  "Diff_KC_AR" = na.omit(dif_kc_filt$gene)
)

# Plot the Venn diagram with colors and title
venn.plot <- venn.diagram(
  x = gene_sets,
  category.names = c("Diff_KC_Reynolds", "Diff_KC_Alkon", "Diff_KC_AR"),
  filename = NULL,  # Set to NULL to plot in RStudio
  output = TRUE,
  fill = c("red", "blue", "green"), # Add colors
  main = "DEGs individual vs merged", # Add title
  cat.dist = c(0.04, 0.04, 0.04), # Adjust the distance of the category names from the circles
  main.cex = 1, # Increase title size
  cat.cex = 0.8, # Increase label size
  cat.pos = c(-20, 20, 180), # Position labels more on the top and KC at the bottom
  cex = 1.5 # Increase numbers size
)

# Display the plot
grid.newpage()
grid.draw(venn.plot)

##Pathways

In [0]:
dif_reynolds_p <- read.xlsx("/dbfs/mnt/sandbox/TFM_PAULA/Reynolds/LvsHC/GSEA_individually/pseudobulk/res_dif_kc_0.05_reynolds.xlsx", sheet = 2)
undif_reynolds_p <- read.xlsx("/dbfs/mnt/sandbox/TFM_PAULA/Reynolds/LvsHC/GSEA_individually/pseudobulk/res_undif_kc_0.05_reynolds.xlsx", sheet = 2)

In [0]:
dif_reynolds_p <- dif_reynolds_p[dif_reynolds_p$p.adjust < 0.05, ]
undif_reynolds_p <- undif_reynolds_p[undif_reynolds_p$p.adjust < 0.05, ]

In [0]:
dim(dif_reynolds_p)

In [0]:
dim(undif_reynolds_p)

In [0]:
dif_alkon_p <- read.xlsx("/dbfs/mnt/sandbox/TFM_PAULA/Alkon/GSEA_individually/pseudobulk/res_dif_kc_0.05_alkon.xlsx", sheet = 2)
undif_alkon_p <- read.xlsx("/dbfs/mnt/sandbox/TFM_PAULA/Alkon/GSEA_individually/pseudobulk/res_undif_kc_0.05_alkon.xlsx", sheet = 2)

In [0]:
dif_alkon_p <- dif_alkon_p[dif_alkon_p$p.adjust < 0.05, ]
undif_alkon_p <- undif_alkon_p[undif_alkon_p$p.adjust < 0.05, ]

In [0]:
dim(dif_alkon_p)

In [0]:
dim(undif_alkon_p)

In [0]:
dim(undif_kc_p)

In [0]:
dim(dif_kc_p)

When doing individually, no terms are significantly enriched, while when doing in merge form i get 29 pathways for undif KC and 130 for diff KC

#Tcells

####Common DEGs between the different annotations

In [0]:
tcell <- read.xlsx("/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_celltypist_results//DEGs/bulk.tcell.de.cov.xlsx")
th <- read.xlsx("/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_celltypist_results/DEGs/bulk.th.de.cov.xlsx")
tcell_prev <- read.xlsx("/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_results/adjusting_cov_dataset_DEGs/bulk.tcell.de.cov.xlsx")

In [0]:
th

In [0]:
tcell <- tcell %>%
  filter(padj < 0.05, abs(log2FoldChange) > 1)
th <- th %>%
  filter(padj < 0.05, abs(log2FoldChange) > 1)
tcell_prev <- tcell_prev %>%
  filter(padj < 0.05, abs(log2FoldChange) > 1)

In [0]:
tcell_prev

In [0]:
# Create a list of your gene sets
gene_sets <- list(
  "Tcell_prev" = na.omit(tcell_prev$gene),
  "Th_Celltypist" = na.omit(th$gene),
  "Tcell_Celltypist" = na.omit(tcell$gene)
)

# Plot the Venn diagram with colors and title
venn.plot <- venn.diagram(
  x = gene_sets,
  category.names = c("Tcell_prev", "Th_Celltypist", "Tcell_Celltypist"),
  filename = NULL,  # Set to NULL to plot in RStudio
  output = TRUE,
  fill = c("red", "blue", "green"), # Add colors
  main = "DEGs comparison btw annotations", # Add title
  cat.dist = c(0.04, 0.04, 0.04), # Adjust the distance of the category names from the circles
  main.cex = 1, # Increase title size
  cat.cex = 0.8, # Increase label size
  cat.pos = c(-20, 20, 180), # Position labels more on the top and KC at the bottom
  cex = 1.5 # Increase numbers size
)

# Display the plot
grid.newpage()
grid.draw(venn.plot)

In [0]:
# Assuming log2FoldChange is a column in the data frames
# Filter for positive log2FoldChange before finding unique genes

# Filter for positive log2FoldChange
tcell_prev_positive <- tcell_prev[tcell_prev$log2FoldChange > 0, ]
th_positive <- th[th$log2FoldChange > 0, ]
tcell_positive <- tcell[tcell$log2FoldChange > 0, ]

# Create a list of filtered gene sets
gene_sets_positive <- list(
  "Tcell_prev" = na.omit(tcell_prev_positive$gene),
  "Th_Celltypist" = na.omit(th_positive$gene),
  "Tcell_Celltypist" = na.omit(tcell_positive$gene)
)

# Find unique genes in each set with positive log2FoldChange
unique_tcell_prev <- setdiff(gene_sets_positive$Tcell_prev, union(gene_sets_positive$Th_Celltypist, gene_sets_positive$Tcell_Celltypist))
unique_th <- setdiff(gene_sets_positive$Th_Celltypist, union(gene_sets_positive$Tcell_prev, gene_sets_positive$Tcell_Celltypist))
unique_tcell <- setdiff(gene_sets_positive$Tcell_Celltypist, union(gene_sets_positive$Tcell_prev, gene_sets_positive$Th_Celltypist))

# Print unique genes
print(unique_tcell_prev)
print(unique_th)
print(unique_tcell)

In [0]:
print(unique_tcell)


####Common pathways between the different annotations

In [0]:
tcell_prev <- read.xlsx("/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_results/GSEA/tcell_reactome_genes.xlsx")
tcell_reactome <- read.xlsx("/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_celltypist_results/GSEA/tcell_reactome_genes.xlsx")
th_reactome <- read.xlsx("/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_celltypist_results/GSEA/th_reactome_genes.xlsx")

In [0]:
max(tcell_reactome$p.adjust)

In [0]:
# Create a list of your pathway sets
pathway_sets <- list(
  "Tcell_Prev" = na.omit(tcell_prev$ID),
  "Th_Celltypist" = na.omit(th_reactome$ID),
  "Tcell_Celltypist" = na.omit(tcell_reactome$ID)
)

# Plot the Venn diagram with colors and title
venn.plot <- venn.diagram(
  x = pathway_sets,
  category.names = c("Tcell_Prev", "Th_Celltypist", "Tcell_Celltypist"),
  filename = NULL,  # Set to NULL to plot in RStudio
  output = TRUE,
  fill = c("red", "blue", "green"), # Add colors
  main = "Pathway comparison btw annotations", # Add title
  cat.dist = c(0.04, 0.04, 0.04), # Adjust the distance of the category names from the circles
  main.cex = 1, # Increase title size
  cat.cex = 0.8, # Increase label size
  cat.pos = c(-20, 20, 180), # Position labels more on the top and KC at the bottom
  cex = 1.5 # Increase numbers size
)

# Display the plot
grid.newpage()
grid.draw(venn.plot)

In [0]:
# Save the pathways that are only in each set
only_Tcell_Prev <- tcell_prev %>% filter(!ID %in% th_reactome$ID & !ID %in% tcell_reactome$ID)
only_Th_Celltypist <- th_reactome %>% filter(!ID %in% tcell_prev$ID & !ID %in% tcell_reactome$ID)
only_Tcell_Celltypist <- tcell_reactome %>% filter(!ID %in% tcell_prev$ID & !ID %in% th_reactome$ID)

In [0]:
dim(only_Tcell_Prev)

In [0]:
dim(only_Th_Celltypist)

In [0]:
dim(only_Tcell_Celltypist)

In [0]:
write.xlsx(only_Tcell_Celltypist, "/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_celltypist_results/GSEA/pathways_only_tcell_celltypist.xlsx")
write.xlsx(only_Th_Celltypist, "/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_celltypist_results/GSEA/pathways_only_th_celltypist.xlsx")
write.xlsx(only_Tcell_Prev, "/dbfs/mnt/sandbox/TFM_PAULA/merged_AR_celltypist_results/GSEA/pathways_only_tcell_prev.xlsx")


In [0]:
display(only_diff_kc_p)