### Installation

In [1]:
# if (!requireNamespace("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# BiocManager::install("chromVAR", version = "3.8")
# BiocManager::install("motifmatchr", version = "3.8")
# BiocManager::install("BSgenome.Hsapiens.UCSC.hg19", version = "3.8")
# BiocManager::install("JASPAR2016", version = "3.8")

###  Import packages

In [2]:
library(chromVAR)
library(motifmatchr)
library(Matrix)
library(SummarizedExperiment)
library(BiocParallel)
library('JASPAR2016')
library(BSgenome.Hsapiens.UCSC.hg19)


Loading required package: GenomicRanges
Loading required package: stats4
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: ‘BiocGenerics’

The following objects are masked from ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following objects are masked from ‘package:Matrix’:

    colMeans, colSums, rowMeans, rowSums, which

The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs

The following objects are masked from ‘package:base’:

    anyDuplicated, append, as.data.frame, basename, cbind, colMeans,
    colnames, colSums, dirname, do.call, duplicated, eval, evalq,
    Filter, Find, get, grep, grepl, intersect, is.unsorted, lapply,
    lengths, Map, mapply, match, mget, order, paste, pmax, pmax.int,
    pmin, pmin.int, Position, rank, rbind, Reduce, rowMeans, r

In [3]:
packageVersion("chromVAR")

[1] ‘1.4.1’

In [4]:
register(MulticoreParam(10))

### Obtain Feature Matrix

In [5]:
start_time <- Sys.time()

In [6]:
set.seed(2019)

In [7]:
metadata <- read.table('../../input/metadata.tsv',
                         header = TRUE,
                         stringsAsFactors=FALSE,quote="",row.names=1)

In [8]:
peakfile <- "../../input/GSE96769_PeakFile_20160207.bed"
peaks <- getPeaks(peakfile, sort_peaks = TRUE)

Peaks sorted


`width=500` will cause the error: Error in loadFUN(x, seqname, ranges): trying to load regions beyond the boundaries of non-circular sequence "chr17"

In [9]:
peaks <- resize(peaks, width = 450, fix = "center")

In [10]:
bamfile <- list.files(path = "../../input/sc-bams_nodup/", pattern = "\\.bam$")

In [11]:
length(bamfile)

In [12]:
cellnames <- sapply(strsplit(bamfile,'.',fixed = TRUE), "[[", 1)

In [13]:
head(cellnames)

In [14]:
sum(cellnames == rownames(metadata))

In [None]:
fragment_counts <- getCounts(paste0("../../input/sc-bams_nodup/",bamfile), 
                             peaks, 
                             paired =  TRUE, 
                             by_rg = TRUE, 
                             format = "bam", 
                             colData = data.frame(celltype = cellnames))

Reading in file: ../../input/sc-bams_nodup/BM1077-CLP-Frozen-160106-13.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-CLP-Frozen-160106-14.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-CLP-Frozen-160106-2.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-CLP-Frozen-160106-21.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-CLP-Frozen-160106-27.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-CLP-Frozen-160106-3.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-CLP-Frozen-160106-36.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-CLP-Frozen-160106-42.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-CLP-Frozen-160106-44.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-CLP-Frozen-160106-50.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-CLP-Frozen-160106-61.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-CLP-Frozen-160106-62.dedup.s

Reading in file: ../../input/sc-bams_nodup/BM1077-CMP-Frozen-160106-89.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-CMP-Frozen-160106-91.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-CMP-Frozen-160106-92.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-CMP-Frozen-160106-95.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-GMP-Frozen-160107-1.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-GMP-Frozen-160107-10.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-GMP-Frozen-160107-11.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-GMP-Frozen-160107-14.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-GMP-Frozen-160107-16.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-GMP-Frozen-160107-20.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-GMP-Frozen-160107-21.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-GMP-Frozen-160107-22.dedup.

Reading in file: ../../input/sc-bams_nodup/BM1077-HSC-Frozen-160105-49.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-HSC-Frozen-160105-5.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-HSC-Frozen-160105-50.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-HSC-Frozen-160105-51.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-HSC-Frozen-160105-53.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-HSC-Frozen-160105-54.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-HSC-Frozen-160105-55.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-HSC-Frozen-160105-56.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-HSC-Frozen-160105-57.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-HSC-Frozen-160105-58.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-HSC-Frozen-160105-59.dedup.st.bam
Reading in file: ../../input/sc-bams_nodup/BM1077-HSC-Frozen-160105-6.dedup.s

In [None]:
fragment_counts

In [None]:
fragment_counts <- addGCBias(fragment_counts, genome = BSgenome.Hsapiens.UCSC.hg19)

In [None]:
head(rowData(fragment_counts))

In [None]:
counts_filtered <- filterPeaks(fragment_counts, non_overlapping = TRUE)

In [None]:
bg <- getBackgroundPeaks(counts_filtered)
# Potentially save the bg object
saveRDS(bg, file = "background_peaks_kmers.rds")

In [None]:
kmer_ix <- matchKmers(6, counts_filtered, genome = BSgenome.Hsapiens.UCSC.hg19)

In [None]:
dev <- computeDeviations(object = counts_filtered, annotations = kmer_ix,
                         background_peaks = bg)

In [None]:
end_time <- Sys.time()

In [None]:
end_time - start_time

In [None]:
df_zscores = dev@assays[[1]]

In [None]:
saveRDS(df_zscores, file = '../../output/feature_matrices/FM_ChromVAR_buenrostro2018bulkpeaks_kmers.rds')

### Downstream Analysis

In [None]:
variability <- computeVariability(dev)
plotVariability(variability, use_plotly = FALSE)

In [None]:
head(variability)
dim(variability)

In [None]:
dev$celltype <- metadata[colnames(dev),]

In [None]:
tsne_results <- deviationsTsne(dev, threshold = 1.5, perplexity = 10)

In [None]:
tsne_plots <- plotDeviationsTsne(dev, tsne_results, 
                                 sample_column = "celltype", 
                                 shiny = FALSE)
tsne_plots[[1]]

In [None]:
sessionInfo()

In [None]:
save.image(file = 'ChromVAR_buenrostro2018bulkpeaks_kmers.RData')