In [35]:
import backedarray as ba
import scipy.sparse
import numpy as np
import h5py
import zarr

# Create Dataset

In [36]:
csr_matrix = scipy.sparse.random(100, 50, format="csr", density=0.2)
dense_array = csr_matrix.toarray()

## HDF5 Backend

In [37]:
# Write sparse matrix in csc or csr format to hdf5 file
h5_csr_path = 'csr.h5'
with h5py.File(h5_csr_path, "w") as f:
    ba.write_sparse(f.create_group("X"), csr_matrix)

In [38]:
# Write 2-d numpy array to hdf5
h5_dense_path = 'dense.h5'
with h5py.File(h5_dense_path, "w") as f:
    f["X"] = dense_array

## Zarr Backend

In [39]:
# Write sparse matrix in csc or csr format to zarr file
zarr_csr_path = 'csr.zarr'
with zarr.open(zarr_csr_path, mode="w") as f:
    ba.write_sparse(f.create_group("X"), csr_matrix)

In [40]:
 # Write 2-d numpy array to zarr format
zarr_dense_path = 'dense.zarr'
with zarr.open(zarr_dense_path, mode="w") as f:
    f["X"] = dense_array

# Read Dataset

## HDF5 Backend

In [41]:
h5_csr_file = h5py.File(h5_csr_path, "r")
h5_csr_disk = ba.open(h5_csr_file["X"])
h5_dense_file =  h5py.File(h5_dense_path, "r")
h5_dense_disk = ba.open(h5_dense_file["X"])

## Zarr Backend

In [42]:
zarr_csr_disk = ba.open(zarr.open(zarr_csr_path)["X"])
zarr_dense_disk = ba.open(zarr.open(zarr_dense_path)["X"])

# Numpy Style Indexing

In [43]:
zarr_csr_disk[1:3].toarray()

array([[0.72856365, 0.8610938 , 0.        , 0.        , 0.04454253,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.3990893 , 0.34160169,
        0.43083183, 0.        , 0.01659938, 0.        , 0.05029692,
        0.04005214, 0.        , 0.        , 0.94356757, 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.73173317, 0.        , 0.37149114, 0.        ,
        0.32696101, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.56223851, 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.05874407, 0.        , 0.        , 0.        ,
        0.        , 0.43282904, 0.3654535 , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.05829187, 0.        ,
        0.        , 0.        , 0.79824896, 0.04639874, 0.        ,
        0.        , 0.        , 0.        , 0. 

In [44]:
zarr_dense_disk[-2:]

array([[0.        , 0.        , 0.        , 0.47410356, 0.        ,
        0.        , 0.63645999, 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.3305854 ,
        0.        , 0.        , 0.26453512, 0.        , 0.        ,
        0.        , 0.85138704, 0.13002323, 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.1908435 , 0.63189417,
        0.94736818, 0.        , 0.        , 0.        , 0.36012683],
       [0.97760577, 0.        , 0.        , 0.78986745, 0.        ,
        0.        , 0.        , 0.        , 0.34662001, 0.        ,
        0.        , 0.        , 0.74670336, 0.57551662, 0.        ,
        0.        , 0.        , 0.17504075, 0.        , 0.        ,
        0.        , 0.        , 0.        , 0. 

In [45]:
h5_csr_disk[2:].toarray()

array([[0.        , 0.05874407, 0.        , ..., 0.48558519, 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.82192458, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.54574553, 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.36012683],
       [0.97760577, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [46]:
h5_csr_disk[...].toarray()

array([[0.        , 0.54525499, 0.        , ..., 0.76724287, 0.66323358,
        0.        ],
       [0.72856365, 0.8610938 , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.05874407, 0.        , ..., 0.48558519, 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.54574553, 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.36012683],
       [0.97760577, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [47]:
h5_dense_disk[:2]

array([[0.        , 0.54525499, 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.69714785, 0.52435088, 0.        ,
        0.        , 0.        , 0.        , 0.06474248, 0.        ,
        0.00189399, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.92923897, 0.        , 0.        , 0.        ,
        0.10634988, 0.        , 0.        , 0.        , 0.        ,
        0.761273  , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.76724287, 0.66323358, 0.        ],
       [0.72856365, 0.8610938 , 0.        , 0.        , 0.04454253,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.3990893 , 0.34160169,
        0.43083183, 0.        , 0.01659938, 0.        , 0.05029692,
        0.04005214, 0.        , 0.        , 0.9

In [48]:
h5_csr_file.close()
h5_dense_file.close()

# Append

In [49]:
zarr_csr_disk.append(csr_matrix)
np.testing.assert_array_equal(zarr_csr_disk[...].toarray(), scipy.sparse.vstack((csr_matrix, csr_matrix)).toarray())

# Read h5ad files created using [anndata](https://anndata.readthedocs.io/)

In [50]:
%%bash
if [ ! -f "pbmc3k.h5ad" ]; then
    wget https://raw.githubusercontent.com/chanzuckerberg/cellxgene/main/example-dataset/pbmc3k.h5ad
fi

In [51]:
import anndata.experimental
with h5py.File('pbmc3k.h5ad', 'r') as f:
    obs = anndata.experimental.read_elem(f['obs'])
    var = anndata.experimental.read_elem(f['var'])
    X = ba.open(f['X'])