# Parse combined multiome object to extract barcode:subtype mappings

## 0. Imports

In [1]:
library(Signac)
library(Seurat)
library(JASPAR2020)
library(TFBSTools)
library(BSgenome.Mmusculus.UCSC.mm10)
library(patchwork)

Loading required package: SeuratObject

Loading required package: sp


Attaching package: ‘SeuratObject’


The following objects are masked from ‘package:base’:

    intersect, t




Loading required package: BSgenome

Loading required package: BiocGenerics


Attaching package: ‘BiocGenerics’


The following object is masked from ‘package:SeuratObject’:

    intersect


The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs


The following objects are masked from ‘package:base’:

    anyDuplicated, aperm, append, as.data.frame, basename, cbind,
    colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
    get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
    match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
    Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
    table, tapply, union, unique, unsplit, which.max, which.min


Loading required package: S4Vectors

Loading required package: stats4


Attaching pa

## 1. Paths

In [2]:
master_data_dir = '/bap/bap/collab_asthma_multiome/'
preprocessing_dir <- file.path(master_data_dir, "outputs", "ATAC", "1_Combined_Preprocessing_Outputs")

In [3]:
output_dir <- file.path(master_data_dir, "outputs", "ATAC", "2_Analysis_Outputs", "1a_ChromVAR_Inputs")

if (!dir.exists(output_dir)) {
  dir.create(output_dir, recursive = TRUE)
}

In [4]:
output_dir

## 2. Load object

In [5]:
multiome.combined.neurons <- readRDS(file.path(preprocessing_dir, "Asthma_Multiome_Combined_NeuronsOnly_Filtered.rds"))
multiome.combined.neurons

An object of class Seurat 
362679 features across 7418 samples within 4 assays 
Active assay: RNA (32285 features, 2000 variable features)
 11 layers present: counts.1, counts.2, counts.3, counts.4, counts.5, data.1, data.2, data.3, data.4, data.5, scale.data
 3 other assays present: ATAC, ACTIVITY, prediction.score.predicted_clusters
 6 dimensional reductions calculated: lsi, umap.atac, pca, umap, ref.pca, ref.umap

## 3. Get valid barcodes

In [6]:
multiome.combined.neurons@meta.data

Unnamed: 0_level_0,orig.ident,nCount_RNA,nFeature_RNA,percent.mt,nCount_ATAC,nFeature_ATAC,nucleosome_signal,nucleosome_percentile,TSS.enrichment,TSS.percentile,nCount_ACTIVITY,nFeature_ACTIVITY,RNA_snn_res.1,seurat_clusters,is_neuron,predicted.predicted_clusters.score,predicted.predicted_clusters,condition
Unnamed: 0_level_1,<chr>,<dbl>,<int>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<fct>,<fct>,<chr>,<dbl>,<chr>,<chr>
NT_AAACAGCCAGTTTCTC-1,SeuratProject,20861,5890,12.051196,15411,7129,0.6718566,0.50,5.194805,0.38,9977,5669,11,11,Neuron,0.9911108,NG4,NT
NT_AAACGGATCAATAGCC-1,SeuratProject,8670,3712,4.094579,29415,13222,0.5606177,0.17,5.040155,0.32,18037,8349,10,10,Neuron,0.6383038,JG6,NT
NT_AAACGGATCCTAATGA-1,SeuratProject,13062,4458,11.598530,15723,7369,0.5985654,0.27,5.370761,0.45,9744,5883,9,9,Neuron,0.5937154,JG1,NT
NT_AAACGTACAAAGCCTC-1,SeuratProject,30314,6358,18.595368,471,259,0.6965517,0.58,5.314685,0.42,343,336,5,5,Neuron,0.6754461,NG5,NT
NT_AAACGTACATGTGGGA-1,SeuratProject,40155,7478,7.077574,81748,33487,0.6739231,0.50,5.022594,0.31,49146,12791,9,9,Neuron,0.7783772,JG1,NT
NT_AAAGCAAGTTAGGTGC-1,SeuratProject,21482,5612,16.283400,38207,16676,0.7199309,0.64,5.759458,0.61,22749,9782,1,1,Neuron,0.7280229,NG11,NT
NT_AAAGGCTCAATAGTCT-1,SeuratProject,57340,9033,7.683990,94924,32813,0.8366753,0.87,5.805339,0.62,58394,13527,18,18,Neuron,1.0000000,NG19,NT
NT_AAAGGCTCAGAAATTG-1,SeuratProject,16922,5631,10.997518,20540,9344,0.6294776,0.37,7.263235,0.91,13428,7247,29,29,Neuron,1.0000000,NG25,NT
NT_AAAGGTTAGTCAATTG-1,SeuratProject,45116,7599,19.538523,77515,30585,0.8026838,0.82,5.263025,0.40,46287,12908,1,1,Neuron,0.4578353,NG11,NT
NT_AAATCCGGTGAGCGAA-1,SeuratProject,25831,6662,8.710464,38231,15911,0.7747962,0.77,6.768989,0.86,23723,9722,13,13,Neuron,1.0000000,NG9,NT


In [7]:
colnames(multiome.combined.neurons@meta.data)

### 3.1 Create mapping dataframe between sample-prepended barcode and subtype

In [8]:
# Create a dataframe where one column is rownames (of metadata) and the other is the predicted.predicted_clusters

sample_bc_names <- rownames(multiome.combined.neurons@meta.data)
predicted_subtypes <- multiome.combined.neurons@meta.data$predicted.predicted_clusters



In [9]:
# Create a new dataframe
barcode_cluster_df <- data.frame(Barcode = sample_bc_names, Cluster = predicted_subtypes)

In [10]:
barcode_cluster_df

Barcode,Cluster
<chr>,<chr>
NT_AAACAGCCAGTTTCTC-1,NG4
NT_AAACGGATCAATAGCC-1,JG6
NT_AAACGGATCCTAATGA-1,JG1
NT_AAACGTACAAAGCCTC-1,NG5
NT_AAACGTACATGTGGGA-1,JG1
NT_AAAGCAAGTTAGGTGC-1,NG11
NT_AAAGGCTCAATAGTCT-1,NG19
NT_AAAGGCTCAGAAATTG-1,NG25
NT_AAAGGTTAGTCAATTG-1,NG11
NT_AAATCCGGTGAGCGAA-1,NG9


## 4. Save as csv file

In [11]:
# Save this dataframe as a CSV file

write.csv(barcode_cluster_df, file.path(output_dir, "sample_barcode_predicted_cluster_df.csv"), row.names = FALSE)

In [12]:
print('Done')

[1] "Done"


# END