In [3]:
import numpy as np
import pandas as pd
import anndata as ad
from scipy.sparse import csr_matrix
from mudata import MuData
import mudata as md
from anndata import AnnData
import pyranges as pr
import bioframe as bf
import bioframe.vis

In [4]:
counts = csr_matrix(np.random.poisson(1, size=(100, 1000)), dtype=np.float32)
exons, cpg = pr.data.exons(), pr.data.cpg()

adata = ad.AnnData(counts)
adata.obs_names = [f"Cell_{i:d}" for i in range(adata.n_obs)]
adata.var_names = [f"Gene_{i:d}" for i in range(adata.n_vars)]

In [5]:
adata.varm['coord'] = exons.df.sample(frac=1, replace=False).set_index(adata.var_names)

In [6]:
coord = pr.PyRanges(adata.varm['coord'].reset_index())

In [7]:
pyranges_df = adata.varm['coord'].reset_index()

In [20]:
bed_column_names = ("Chromosome", "Start", "End")
query_result = bf.select(exons[["Chromosome", "Start", "End"]], "chrX:0-1000000", cols=bed_column_names)
query_result.head()

TypeError: Invalid bedFrame: Invalid column names

In [19]:
exons[["Chromosome", "Start", "End"]]

Unnamed: 0,Chromosome,Start,End,Strand
0,chrX,135721701,135721963,+
1,chrX,135574120,135574598,+
2,chrX,47868945,47869126,+
3,chrX,77294333,77294480,+
4,chrX,91090459,91091043,+
...,...,...,...,...
995,chrY,15591133,15591197,-
996,chrY,15409586,15409728,-
997,chrY,15478146,15478273,-
998,chrY,15360258,15361762,-


In [110]:
pr.PyRanges(pyranges_df)

Unnamed: 0,index,Chromosome,Start,End,Name,Score,Strand
0,Gene_0,chrX,51453924,51455226,NR_033773_exon_0_0_chrX_51453925_f,0,+
1,Gene_2,chrX,105880989,105881024,NM_001184782_exon_7_0_chrX_105880990_f,0,+
2,Gene_3,chrX,115585489,115585608,NM_007231_exon_9_0_chrX_115585490_f,0,+
3,Gene_5,chrX,110463585,110464173,NM_001128173_exon_14_0_chrX_110463586_f,0,+
4,Gene_7,chrX,49315926,49315999,NM_001127345_exon_0_0_chrX_49315927_f,0,+
...,...,...,...,...,...,...,...
995,Gene_973,chrY,15409586,15409728,NR_047626_exon_3_0_chrY_15409587_r,0,-
996,Gene_977,chrY,15522872,15522993,NM_001258270_exon_22_0_chrY_15522873_r,0,-
997,Gene_984,chrY,15526614,15526673,NR_047609_exon_23_0_chrY_15526615_r,0,-
998,Gene_986,chrY,15417278,15417427,NR_047625_exon_6_0_chrY_15417279_r,0,-


In [105]:
pr.PyRanges(chromosomes='chrX', starts=[100000000], ends=[900000000])

Unnamed: 0,Chromosome,Start,End
0,chrX,100000000,900000000


In [9]:
idx = coord.intersect(pr.PyRanges(chromosomes='chrX', starts=[100000000], ends=[130000000])).index

In [10]:
adata[:, idx]

View of AnnData object with n_obs × n_vars = 100 × 173
    varm: 'coord'

In [14]:
def slice_pyrange(adata, chrom, start, end):
    prange = pr.PyRanges(chromosomes=chrom, starts=[start], ends=[end])
    return subset_by_overlap(adata, prange)

In [98]:
slice_pyrange(adata, 'chrX', 100000000, 120000000)

View of AnnData object with n_obs × n_vars = 100 × 122
    varm: 'coord'

In [57]:
def subset_by_overlap(adata, prange):
    coord = pr.PyRanges(adata.varm['coord'].reset_index())
    idx = coord.overlap(prange).index
    return adata[:, idx]

In [104]:
exons['chrX']['+']

Unnamed: 0,Chromosome,Start,End,Name,Score,Strand
0,chrX,135721701,135721963,NR_038462_exon_0_0_chrX_135721702_f,0,+
1,chrX,135574120,135574598,NM_001727_exon_2_0_chrX_135574121_f,0,+
2,chrX,47868945,47869126,NM_205856_exon_4_0_chrX_47868946_f,0,+
3,chrX,77294333,77294480,NM_000052_exon_17_0_chrX_77294334_f,0,+
4,chrX,91090459,91091043,NM_001168360_exon_0_0_chrX_91090460_f,0,+
...,...,...,...,...,...,...
428,chrX,117749562,117749674,NM_144658_exon_29_0_chrX_117749563_f,0,+
429,chrX,129484619,129484705,NM_001282196_exon_5_0_chrX_129484620_f,0,+
430,chrX,70607110,70607311,NR_104391_exon_14_0_chrX_70607111_f,0,+
431,chrX,13587693,13588054,NM_001167890_exon_0_0_chrX_13587694_f,0,+


In [111]:
slice_pyrange(adata, 'chrX', 10000000, 100000000)

View of AnnData object with n_obs × n_vars = 100 × 448
    varm: 'coord'

In [112]:
n, d, k = 1000, 100, 10

z = np.random.normal(loc=np.arange(k), scale=np.arange(k)*2, size=(n,k))
w = np.random.normal(size=(d,k))
y = np.dot(z, w.T)
y.shape

adata = AnnData(y)
adata.obs_names = [f"obs_{i+1}" for i in range(n)]
adata.var_names = [f"var_{j+1}" for j in range(d)]

d2 = 50
w2 = np.random.normal(size=(d2,k))
y2 = np.dot(z, w2.T)

adata2 = AnnData(y2)
adata2.obs_names = [f"obs_{i+1}" for i in range(n)]
adata2.var_names = [f"var2_{j+1}" for j in range(d2)]


mdata = MuData({"A": adata, "B": adata2})

  adata = AnnData(y)
  adata2 = AnnData(y2)


In [113]:
mdata