In [161]:
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [162]:
#!pip install scplode

In [163]:
#Create example adata file for testing
import numpy as np
import pandas as pd
import anndata as ad
n_obs = 50
n_vars = 50

# Random expression matrix
X = np.random.rand(n_obs, n_vars).astype("float32")

# Create obs and var dataframes
obs = pd.DataFrame(
    index=[f"cell_{i}" for i in range(n_obs)],
    data={"cell_type": np.random.choice(["A", "B"], size=n_obs)}
)
var = pd.DataFrame(
    index=[f"gene_{i}" for i in range(n_vars)],
    data={"gene_name": [f"gene_{i}" for i in range(n_vars)]}
)

# Create AnnData
adata = ad.AnnData(X=X, obs=obs, var=var)

# Optionally write to file
adata.write('test.h5')

In [164]:
import scplode as sp
#Be warned!! scplode will "scplode" (explode) and create new files. 
#Make sure you have adequate disk space
#By default the new indexed files will be in your directory. 
adata_path = 'test.h5'
spadata = sp.read_h5ad(adata_path)
spadata

[INFO] Creating index
[INFO] Creating index: reading adata file
[INFO] Creating index: writing mmap dat file


  0%|          | 0/1 [00:00<?, ?it/s]

[INFO] Creating index: packing obs
[INFO] Creating index: packing var
[INFO] Loading index: obs
[INFO] Loading index: var
[INFO] Loading index: dat (implicitly)


<scplode.scplode.Scplode at 0x120e288d0>

In [165]:
#Examine obs and var
spadata.obs.head()

Unnamed: 0,cell_type
cell_0,B
cell_1,B
cell_2,B
cell_3,A
cell_4,A


In [166]:
spadata.var.head()

Unnamed: 0,gene_name
gene_0,gene_0
gene_1,gene_1
gene_2,gene_2
gene_3,gene_3
gene_4,gene_4


In [167]:
#Example 1: index by rows
spadata[10:20]

View of AnnData object with n_obs × n_vars = 10 × 50
    obs: 'cell_type'
    var: 'gene_name'

In [168]:
#Make sure the data is the same

#Load a comparison using anndata
import anndata as ad
adata = ad.read_h5ad(adata_path)
assert (spadata[10:20].X == adata[10:20].X).all()
assert spadata[10:20].obs.equals(adata[10:20].obs)

In [169]:
#Example 2: index by rows and columns
spadata[10:20, 10:20]

View of AnnData object with n_obs × n_vars = 10 × 10
    obs: 'cell_type'
    var: 'gene_name'

In [170]:
#Make sure the data is the same
assert (spadata[10:20, 10:20].X == adata[10:20, 10:20].X).all()
assert spadata[10:20, 10:20].var.equals(adata[10:20, 10:20].var)

In [171]:
#Example 3: Index by specific barcodes
barcodes = spadata.obs.index[10:20]
spadata[barcodes]

View of AnnData object with n_obs × n_vars = 10 × 50
    obs: 'cell_type'
    var: 'gene_name'

In [172]:
#Make sure the data is the same
assert (spadata[barcodes].X == adata[barcodes].X).all()

In [173]:
#Cleanup
spadata.delete_index()
from pathlib import Path
Path(adata_path).unlink()

[INFO] Deleted: test.obs
[INFO] Deleted: test.var
[INFO] Deleted: test.dat
[INFO] Deleted: test.mtime
