# Get pass-QC barcodes for later chromVAR analysis

## 0. Imports

In [None]:
library(Signac)
library(Seurat)
library(JASPAR2020)
library(TFBSTools)
library(BSgenome.Mmusculus.UCSC.mm10)
library(patchwork)

## 1. Paths

In [None]:
master_data_dir = '/bap/bap/collab_asthma_multiome/'
preprocessing_dir <- file.path(master_data_dir, "outputs", "ATAC", "1_Combined_Preprocessing_Outputs")

In [None]:
output_dir <- file.path(master_data_dir, "outputs", "ATAC", "2_Analysis_Outputs", "1a_ChromVAR_Inputs")

if (!dir.exists(output_dir)) {
  dir.create(output_dir, recursive = TRUE)
}

In [None]:
output_dir

## 2. Load object

In [None]:
multiome.combined.neurons <- readRDS(file.path(preprocessing_dir, "Asthma_Multiome_Combined_NeuronsOnly_Filtered.rds"))
multiome.combined.neurons

## 3. Get valid barcodes

### 3.1 Create another metadata column for label+condition

In [None]:
multiome.combined.neurons@meta.data

In [None]:
# Add metadata column to join predicted cluster with condition name
multiome.combined.neurons@meta.data$predicted.cluster.condition <- paste(multiome.combined.neurons@meta.data$predicted.predicted_clusters, multiome.combined.neurons@meta.data$condition, sep = "_")

In [None]:
unique(multiome.combined.neurons@meta.data$predicted.cluster.condition)

### 3.2 Extract valid barcodes per condition

In [None]:
head(Idents(multiome.combined.neurons))

In [None]:
extract_valid_barcodes <- function(seurat_obj, sample_condition, sample_name, output_path) {
  # Subset the Seurat object based on the provided condition
  sample_subset <- subset(seurat_obj, subset = condition == sample_condition)
  
  # Extract the valid barcodes from the row names (do not modify the object)
  valid_barcodes <- rownames(sample_subset@meta.data)
  
  # Strip the sample prefix by splitting at "_" and taking the last element.
  # For example, "PBS_C_TTTGTTGGTCAAAGGG-1" becomes "TTTGTTGGTCAAAGGG-1"
  valid_barcodes_stripped <- sapply(strsplit(valid_barcodes, "_"), function(x) tail(x, n = 1))
  
  # Construct the output file path: <sample_name>_valid_barcodes.txt
  out_file <- file.path(output_path, paste0(sample_name, "_valid_barcodes.txt"))
  
  # Write the transformed valid barcodes to the text file without quotes, row names, or column names
  write.table(valid_barcodes_stripped, 
              file = out_file, 
              quote = FALSE, 
              row.names = FALSE, 
              col.names = FALSE)
  
  # Optionally, return the subsetted object
  return(sample_subset)
}

In [None]:
# -------------------
# Sample 1: NT
# -------------------
multiome.combined.neurons.NT <- extract_valid_barcodes(multiome.combined.neurons, 
                                                         sample_condition = "NT", 
                                                         sample_name = "NT", 
                                                         output_path = output_dir)

# -------------------
# Sample 2: PBS
# -------------------
multiome.combined.neurons.PBS <- extract_valid_barcodes(multiome.combined.neurons, 
                                                          sample_condition = "PBS", 
                                                          sample_name = "PBS", 
                                                          output_path = output_dir)

# -------------------
# Sample 3: OVA
# -------------------
multiome.combined.neurons.OVA <- extract_valid_barcodes(multiome.combined.neurons, 
                                                          sample_condition = "OVA", 
                                                          sample_name = "OVA", 
                                                          output_path = output_dir)

# -------------------
# Sample 4: PBS_Chase
# -------------------
multiome.combined.neurons.PBS_Chase <- extract_valid_barcodes(multiome.combined.neurons, 
                                                                sample_condition = "PBS_C", 
                                                                sample_name = "PBS_C", 
                                                                output_path = output_dir)

# -------------------
# Sample 5: OVA_Chase
# -------------------
multiome.combined.neurons.OVA_Chase <- extract_valid_barcodes(multiome.combined.neurons, 
                                                                sample_condition = "OVA_C", 
                                                                sample_name = "OVA_C", 
                                                                output_path = output_dir)

In [None]:
# print each object
print(multiome.combined.neurons.NT)
print(multiome.combined.neurons.OVA_Chase)
print(multiome.combined.neurons.OVA)
print(multiome.combined.neurons.PBS_Chase)
print(multiome.combined.neurons.PBS)


In [None]:
print("Done")

# END