* Convert anndata to summarized experiment?
* Extract gene activity matrix from ArchR
* convert to scJoint input: https://github.com/sydneybiox/scJoint/blob/main/tutorial/sce_to_h5.ipynb

In [19]:
library(ArchR)
library(parallel)
library(zellkonverter)
library(SingleCellExperiment)
addArchRThreads(threads = 16) 
addArchRGenome('hg19')

Setting default number of Parallel threads to 16.

Setting default genome to Hg19.



# Read ATAC data 

In [2]:
data_path <- "/storage/groups/ml01/workspace/laura.martens/moretti_colab/archr_final/"

In [3]:
atac <- loadArchRProject(paste0(data_path, "Epicardiods"))

Successfully loaded ArchRProject!


                                                   / |
                                                 /    \
            .                                  /      |.
            \\\                              /        |.
              \\\                          /           `|.
                \\\                      /              |.
                  \                    /                |\
                  \\#####\           /                  ||
                ==###########>      /                   ||
                 \\##==......\    /                     ||
            ______ =       =|__ /__                     ||      \\\
       \               '        ##_______ _____ ,--,__,=##,__   ///
        ,    __==    ___,-,__,--'#'  ==='      `-'    | ##,-/
        -,____,---'       \\####\\________________,--\\_##,/
           ___      .______        ______  __    __  .______      
          /   \     |   _  \      /      ||  |  |  | |   _ 

In [5]:
#get gene activity from archr project
gene_scores <- getMatrixFromProject(atac, useMatrix='GeneScoreMatrix')

ArchR logging to : ArchRLogs/ArchR-getMatrixFromProject-4fb76dcea830-Date-2022-06-16_Time-18-30-07.log
If there is an issue, please report to github with logFile!

2022-06-16 18:33:36 : Organizing colData, 3.484 mins elapsed.

2022-06-16 18:33:37 : Organizing rowData, 3.49 mins elapsed.

2022-06-16 18:33:37 : Organizing rowRanges, 3.493 mins elapsed.

2022-06-16 18:33:37 : Organizing Assays (1 of 1), 3.494 mins elapsed.

2022-06-16 18:34:48 : Constructing SummarizedExperiment, 4.676 mins elapsed.

2022-06-16 18:34:50 : Finished Matrix Creation, 4.705 mins elapsed.



In [11]:
gene_scores <- as(gene_scores, "SingleCellExperiment")

In [107]:
counts(gene_scores) <- assays(gene_scores)$GeneScoreMatrix

In [17]:
rownames(gene_scores) <- rowData(gene_scores)$name

In [109]:
#create logcounts
libsizes <- colSums(counts(gene_scores))
size.factors <- libsizes/mean(libsizes)

In [110]:
logcounts(gene_scores) <- log2(t(t(counts(gene_scores))/size.factors) + 1)

In [111]:
gene_scores

class: SingleCellExperiment 
dim: 23127 39738 
metadata(0):
assays(3): GeneScoreMatrix counts logcounts
rownames(23127): DDX11L1 WASH7P ... CLIC2 TMLHE
rowData names(6): seqnames start ... name idx
colnames(39738): MUC26649_1234#ACTCGGGAGTTCCCGG-1
  MUC26649_1234#GATCGTAGTGATCAGG-1 ... MUC26653_1234#ATTACTCAGTCGAAAT-1
  MUC26653_1234#TAAACCGGTAGAAAGG-1
colData names(16): BlacklistRatio DoubletEnrichment ... TSSEnrichment
  projColData[, colnames(projColData) %ni% colnames(colData)]
reducedDimNames(0):
mainExpName: NULL
altExpNames(0):

In [112]:
8

# Load RNA

In [39]:
library(data.table)

In [40]:
data_path <- "/storage/groups/ml01/workspace/laura.martens/moretti_colab/transfer_data/"

In [36]:
#Convert(paste0(data_path, "Day2_15_RNAseq.h5ad"), dest = "h5seurat", overwrite = TRUE)

In [41]:
counts <- fread(paste0(data_path, "counts.csv"))

"Detected 24354 column names but the data has 24355 columns (i.e. invalid file). Added 1 extra default column name for the first column which is guessed to be row names or an index. Use setnames() afterwards if this guess is not correct, or fix the file write command that created the file to create a valid file."


In [49]:
mat <- as.matrix(counts[,3:ncol(counts)])

In [53]:
rownames(mat) <- counts$V1

In [71]:
mat <- t(mat)

In [82]:
coldata <- data.frame(clusters=as.factor(counts$clusters), row.names=colnames(mat))

In [83]:
rna <- SingleCellExperiment(assays = list(counts = mat), colData=coldata)

In [84]:
rna

class: SingleCellExperiment 
dim: 24353 35499 
metadata(0):
assays(1): counts
rownames(24353): RP11-34P13.7 AL627309.1 ... RP1-5O6.5 AP000695.1
rowData names(0):
colnames(35499): AAACCCAAGAGCAAGA-1_5 AAACCCAAGCCGCACT-1_7 ...
  TTTGTTGTCTCTCTAA-1_6 TTTGTTGTCTTAGGAC-1_1
colData names(1): clusters
reducedDimNames(0):
mainExpName: NULL
altExpNames(0):

In [None]:
#create logcounts
libsizes <- colSums(mat)
size.factors <- libsizes/mean(libsizes)

logcounts(rna) <- log2(t(t(mat)/size.factors) + 1)

In [85]:
table(droplevels(rna$clusters))


   0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
3460 2896 2644 2520 2208 2020 1930 1889 1486 1346 1130 1010  977  922  903  855 
  16   17   18   19   20   21   22   23   24   25   26   27   28   29   30 
 826  731  689  671  606  560  546  540  477  393  362  310  286  176  130 

# Save h5 data

In [113]:
# Only keep common genes between two dataset
common_genes <- intersect(rownames(rna),
                          rownames(gene_scores))
length(common_genes)

In [137]:
# Extract the logcounts data from sce object
exprs_atac <- logcounts(gene_scores)[rownames(gene_scores) %in% common_genes]

In [138]:
exprs_atac

In [124]:
exprs_rna <- logcounts(rna)[rownames(rna) %in% common_genes]

In [126]:
source("/storage/groups/ml01/code/laura.martens/github_repos/scJoint/data_to_h5.R")

"package 'HDF5Array' was built under R version 4.1.3"
Loading required package: DelayedArray


Attaching package: 'DelayedArray'


The following objects are masked from 'package:base':

    aperm, apply, rowsum, scale, sweep



Attaching package: 'HDF5Array'


The following object is masked from 'package:rhdf5':

    h5ls




In [128]:
write_h5_scJoint(exprs_list = list(rna = exprs_rna,
                                   atac = exprs_atac), 
                 h5file_list = c("/storage/groups/ml01/workspace/laura.martens/moretti_colab/scjoint/exprs_10xPBMC_rna.h5", 
                                 "/storage/groups/ml01/workspace/laura.martens/moretti_colab/scjoint/exprs_10xPBMC_atac.h5"))

In [140]:
write_h5_scJoint(exprs_list = list(atac = exprs_atac), 
                 h5file_list = c("/storage/groups/ml01/workspace/laura.martens/moretti_colab/scjoint/exprs_10xPBMC_atac.h5"))

ERROR: Error in UseMethod("h5writeDataset"): no applicable method for 'h5writeDataset' applied to an object of class "NULL"


In [134]:
write_csv_scJoint(cellType_list =  list(rna = rna$clusters),
                  csv_list = c("/storage/groups/ml01/workspace/laura.martens/moretti_colab/scjoint/cellType_10xPBMC_rna.csv"))