In [1]:
import numpy as np
import pandas as pd
import anndata as ad
from scipy.sparse import csr_matrix
from mudata import MuData
import mudata as md
from anndata import AnnData
import pyranges as pr

In [2]:
def set_coord(adata, prange):
    adata.varm['coord'] = prange.df.set_index(adata.var_names)

In [4]:
def subset_by_overlap(adata, prange):
    coord = pr.PyRanges(adata.varm['coord'].reset_index())
    idx = coord.overlap(prange).index
    return adata[:, idx]

In [3]:
def slice_pyrange(adata, chrom, start, end):
    prange = pr.PyRanges(chromosomes=chrom, starts=[start], ends=[end])
    return subset_by_overlap(adata, prange)

In [10]:
class RangeAnnData(AnnData):
    def set_coord(self, prange):
        self.varm['coord'] = prange.df.set_index(adata.var_names)

    def subset_by_overlap(self, prange):
        coord = pr.PyRanges(self.varm['coord'].reset_index())
        idx = coord.overlap(prange).index
        return self[:, idx]

    def slice_pyrange(self, chrom, start, end):
        prange = pr.PyRanges(chromosomes=chrom, starts=[start], ends=[end])
        return subset_by_overlap(self, prange)

In [16]:
counts = csr_matrix(np.random.poisson(1, size=(100, 1000)), dtype=np.float32)
exons, gr = pr.data.exons(), pr.data.cpg()

adata = RangeAnnData(counts)
adata.obs_names = [f"Cell_{i:d}" for i in range(adata.n_obs)]
adata.var_names = [f"Gene_{i:d}" for i in range(adata.n_vars)]

In [17]:
adata.set_coord(exons)

In [19]:
slice_adata = adata.slice_pyrange('chrX', 1000000, 10000000)
slice_adata.X

<100x32 sparse matrix of type '<class 'numpy.float32'>'
	with 2011 stored elements in Compressed Sparse Row format>

In [20]:
subset_adata = adata.subset_by_overlap(gr)
subset_adata.X

<100x78 sparse matrix of type '<class 'numpy.float32'>'
	with 4899 stored elements in Compressed Sparse Row format>

In [None]:
class RangeMuData(MuData):
    def set_coord(self, prange):
        self.varm['coord'] = prange.df.set_index(adata.var_names)

    def subset_by_overlap(self, prange):
        coord = pr.PyRanges(self.varm['coord'].reset_index())
        idx = coord.overlap(prange).index
        return self[:, idx]

    def slice_pyrange(self, chrom, start, end):
        prange = pr.PyRanges(chromosomes=chrom, starts=[start], ends=[end])
        return subset_by_overlap(self, prange)