In [1]:
suppressMessages(library(ArchR))
suppressMessages(library(SummarizedExperiment))

ss <- function(x, pattern, slot = 1, ...) { sapply(strsplit(x = x, split = pattern, ...), '[', slot) }
options(stringsAsFactors = F)
options(repr.plot.width=14, repr.plot.height=8.5)

In [2]:
##################################
### set Arrow File parameters ####
addArchRThreads(threads = 10)
addArchRGenome("mm10")

Setting default number of Parallel threads to 10.

Setting default genome to Mm10.



In [3]:
PROJDIR='../../../data/raw_data/mm10/BICCN_mouse_caudoputamen'
ARCHDIR=file.path(PROJDIR,'ArchR_BICCN_CP_labeled')
proj = loadArchRProject(ARCHDIR)

Successfully loaded ArchRProject!


                                                   / |
                                                 /    \
            .                                  /      |.
            \\\                              /        |.
              \\\                          /           `|.
                \\\                      /              |.
                  \                    /                |\
                  \\#####\           /                  ||
                ==###########>      /                   ||
                 \\##==......\    /                     ||
            ______ =       =|__ /__                     ||      \\\
       \               '        ##_______ _____ ,--,__,=##,__   ///
        ,    __==    ___,-,__,--'#'  ==='      `-'    | ##,-/
        -,____,---'       \\####\\________________,--\\_##,/
           ___      .______        ______  __    __  .______      
          /   \     |   _  \      /      ||  |  |  | |   _ 

In [4]:
getAvailableMatrices(proj)
head(cellColData(proj))

ERROR: Error in cellColData(proj): could not find function "cellColData"


In [None]:
# add iterative LSI
proj <- addIterativeLSI(
  ArchRProj = proj, useMatrix = "OrthologPeakMatrix", 
  name = "OrthologIterativeLSI",
  LSIMethod = 2, #"tf-logidf", "log(tf-idf)", "logtf-logidf"
  iterations = 6, # increase this if noticing subtle batch effects
  scaleTo = 3000,
  selectionMethod = 'var',
  clusterParams = list( # See Seurat::FindClusters
    resolution = c(.1, .2, rep(.4, 3)), # lower this if noticing subtle batch effects
    sampleCells = 10000,  n.start = 10), 
  varFeatures = 150000, # also can reduce this if noticing subtle batch effects
  dimsToUse = 1:40, force = TRUE)

In [None]:
proj <- addHarmony( ArchRProj = proj, reducedDims = "OrthologIterativeLSI",
                    name = "OrthologHarmony", groupBy = "RegionName",force = TRUE)

proj = addUMAP(proj, name = "OrthologUMAP", force = TRUE,
               reducedDims = "OrthologHarmony")

In [None]:
## make UMAP plots
p1 <- plotEmbedding(ArchRProj = proj, colorBy = "cellColData", 
                    name = "Clusters2", embedding = "OrthologUMAP")

p2 <- plotEmbedding(ArchRProj = proj, colorBy = "cellColData", 
                    name = "RegionName", embedding = "OrthologUMAP")

p3 <- plotEmbedding(ArchRProj = proj, colorBy = "cellColData", 
                    name = "Sample", embedding = "OrthologUMAP")
ggAlignPlots(p1, p2, type = "h")
p1

In [None]:
proj = saveArchRProject(ArchRProj = proj)

# Load in mouse cSNAIL count matrix

In [None]:
PROJDIR2=file.path('../../../data/raw_data/mm10/Mouse_cSNAIL_D1D2')
cSNAILrse_fn = file.path(PROJDIR2,'rdas',
                        'multiSpeciesOrthologousPeaks_cSNAILfeatureCounts_RSE_n12_20201022.RDS')
rse = readRDS(file = cSNAILrse_fn)
rse

In [None]:
projected = ArchR::projectBulkATAC(
    ArchRProj = proj, seATAC = rse, 
    reducedDims = "OrthologIterativeLSI",
    embedding = "OrthologUMAP", n = 30) # how many cells to sample for each bulk profile

In [None]:
########################################
# get the single Cell UMAP coordinates #
singleCellUMAP = projected$singleCellUMAP
singleCellUMAP = cbind(singleCellUMAP, proj@cellColData)

############################
# get the bulk projections #
simulatedBulkUMAP = projected$simulatedBulkUMAP
simulatedBulkUMAP = simulatedBulkUMAP[sample(nrow(simulatedBulkUMAP)), ]
head(simulatedBulkUMAP)
simulatedBulkUMAP = cbind(simulatedBulkUMAP, colData(rse)[simulatedBulkUMAP$Type,])

In [None]:
ggplot(as.data.frame(singleCellUMAP), aes(x = UMAP1, y = UMAP2)) + 
  geom_point(color = 'gray',pch = 20) +
  geom_point(data = as.data.frame(simulatedBulkUMAP), 
             aes(x = UMAP1, y = UMAP2, color = Celltype, shape = Sample), 
             alpha = .5, size =5) +
    scale_shape_manual(values = LETTERS) +
  facet_grid(Tissue~Celltype) + theme_bw(base_size = 18) + 
  guides(colour = guide_legend(override.aes = list(shape = 20, alpha = 1, size = 3)))