<a href="https://colab.research.google.com/github/nayoungku1/demuxly/blob/main/multi_sample_trial/demux_version_comparison.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ================================
# Cell Ranger Version Comparison Analysis Example
# ================================

library(data.table)
library(dplyr)
library(ggplot2)
library(reshape2)
library(pheatmap)

# -------------------------------
# 1. Set file paths for each version
# -------------------------------
versions <- c("v6","v7","v8","v9")

# Folder name for each version
base_path <- "PBMC_10k_demux_" # Modify to match actual path

# -------------------------------
# 2. Read assignment_confidence_table.csv
# -------------------------------
assignment_list <- list()

for (v in versions){
  file_path <- paste0(base_path, v, "/outs/multi/multiplexing_analysis/assignment_confidence_table.csv")
  df <- fread(file_path)
  df$version <- v
  assignment_list[[v]] <- df
}

all_assignments <- bind_rows(assignment_list)

# -------------------------------
# 3. Compare Singlet/Multiplet/Negative Ratios
# -------------------------------
qc_summary <- all_assignments %>%
  group_by(version, assignment) %>%
  summarise(n_cells = n()) %>%
  mutate(percentage = n_cells / sum(n_cells) * 100)

# Visualization
ggplot(qc_summary, aes(x=version, y=percentage, fill=assignment)) +
  geom_bar(stat="identity", position="stack") +
  ylab("Percentage of Cells") +
  xlab("Cell Ranger Version") +
  ggtitle("Demultiplexing Assignment Comparison") +
  theme_minimal()

# -------------------------------
# 4. Read and compare tag_calls_summary.csv
# -------------------------------
summary_list <- list()

for (v in versions){
  file_path <- paste0(base_path, v, "/outs/multi/multiplexing_analysis/tag_calls_summary.csv")
  df <- fread(file_path)
  df$version <- v
  summary_list[[v]] <- df
}

all_summary <- bind_rows(summary_list)

# Example: Compare average tag counts per sample (bar plot)
ggplot(all_summary, aes(x=sample, y=avg_umi, fill=version)) +
  geom_bar(stat="identity", position="dodge") +
  ylab("Average Tag Counts per Cell") +
  xlab("Sample") +
  ggtitle("HTO/CMO Average Counts Comparison Across Versions") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# -------------------------------
# 5. Compare HTO/CMO counts heatmap
# -------------------------------
# Example: top 50 cells for each version
for (v in versions){
  file_path <- paste0(base_path, v, "/outs/multi/multiplexing_analysis/tag_calls_per_cell.csv")
  df <- fread(file_path)
  top_cells <- df[1:50, ]
  mat <- as.matrix(top_cells[, -1, with=FALSE])
  rownames(mat) <- top_cells$barcode
  pheatmap(mat, cluster_cols=FALSE,
           main=paste("HTO/CMO Counts Heatmap -", v))
}

# -------------------------------
# 6. Compare Singlet Cells (Optional)
# -------------------------------
# Save Singlet barcodes for each version
for (v in versions){
  singlets <- assignment_list[[v]] %>% filter(assignment == "Singlet")
  write.csv(singlets$barcode, paste0("singlets_", v, ".csv"), row.names = FALSE)
}