In [None]:
import tiledbsoma

In [None]:
from tiledbsoma.io.ingest import from_h5ad

In [None]:
e = from_h5ad("test", "/opt/cellxgene/example-dataset/pbmc3k.h5ad", "test_exp_name")
e.close()

## Experiment

An `Experiment` is a class that represents a single-cell experiment. It always contains two objects:
1. `obs`: A `DataFrame` with primary annotations on the observation axis.
2. `ms`: A `Collection` of measurements.

In [83]:
experiment = tiledbsoma.open("/opt/TileDB-SOMA/test/")
experiment

<Experiment '/opt/TileDB-SOMA/test/' (open for 'r') (2 items)
    'ms': 'file:///opt/TileDB-SOMA/test/ms' (unopened)
    'obs': 'file:///opt/TileDB-SOMA/test/obs' (unopened)>

Each object can be opened like this:

In [85]:
experiment.ms

<Collection 'file:///opt/TileDB-SOMA/test/ms' (open for 'r') (2 items)
    'raw': 'file:///opt/TileDB-SOMA/test/ms/raw' (unopened)
    'test_exp_name': 'file:///opt/TileDB-SOMA/test/ms/test_exp_name' (unopened)>

In [86]:
experiment.obs

<DataFrame 'file:///opt/TileDB-SOMA/test/obs' (open for 'r')>

Note that by default an `Experiment` is opened lazily, i.e. only the minimal requested objects are opened. 

Also, opening an object doesn't mean that it will entirely be fetched in memory. It only returns a pointer to the object on disk.

## DataFrame

A `DataFrame` is a multi-column table with a user-defined schema. The schema is expressed as an Arrow Schema, and defines the column names and value types.

As an example, let's take a look at `obs`, which is represented as a DataFrame.

We can inspect the schema using `.schema`:

In [100]:
obs = experiment.obs
obs.schema

soma_joinid: int64
obs_id: large_string
n_genes: int64
percent_mito: float
n_counts: float
louvain: large_string

Note that `soma_joinid` is a field that exists in each `DataFrame` and acts as a join key for other objects, such as `SparseNDArray` (more on this later).

## Collection

## Measurement

## DenseNDArray

## SparseNDArray

In [None]:
# DataFrame
obs = experiment.obs
obs

In [None]:
# Collection
ms = experiment.ms
ms

In [94]:
# Measurement 
meas = ms["test_exp_name"]
meas

<Measurement 'file:///opt/TileDB-SOMA/test/ms/test_exp_name' (open for 'r') (5 items)
    'obsm': 'file:///opt/TileDB-SOMA/test/ms/test_exp_name/obsm' (unopened)
    'varm': 'file:///opt/TileDB-SOMA/test/ms/test_exp_name/varm' (unopened)
    'X': Collection 'file:///opt/TileDB-SOMA/test/ms/test_exp_name/X' (open for 'r') (1 item)
        'data': DenseNDArray 'file:///opt/TileDB-SOMA/test/ms/test_exp_name/X/data' (open for 'r')
    'obsp': 'file:///opt/TileDB-SOMA/test/ms/test_exp_name/obsp' (unopened)
    'var': DataFrame 'file:///opt/TileDB-SOMA/test/ms/test_exp_name/var' (open for 'r')>

In [None]:
# DenseNDArray
meas.X["data"]

In [None]:
# For SparseNDArray, convert the original h5ad to use a sparse matrix for X and re-open it

import anndata as ad