In [19]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
import anndata as ad
from gpu_spatial_graph_pipeline.data.utils import transforms
from gpu_spatial_graph_pipeline.data.anndata2data import AnnData2DataByCategory


## All NCEM Datasets

### Creating A2D

In [21]:
fields = {
    'features':['obs/Cluster_preprocessed','obs/donor','obsm/design_matrix'],
    'labels':['X']
}

preprocess = [
    lambda x,_: transforms.add_design_matrix(x,'obs/Cluster_preprocessed','obs/donor','design_matrix')
]

category_to_iterate = 'point'

a2d = AnnData2DataByCategory(
    fields=fields,
    category=category_to_iterate,
    preprocess=preprocess,
    yields_edge_index=False,
)

### Load Unprocessed Dataset

In [22]:
from gpu_spatial_graph_pipeline.data.datasets import DatasetHartmann


#Mibitof
dataset = DatasetHartmann(data_path='./example_data/hartmann/')
adatas = list(dataset.img_celldata.values())



Loading data from raw files
registering celldata




collecting image-wise celldata
adding graph-level covariates
Loaded 58 images with complete data from 4 patients over 63747 cells with 36 cell features and 8 distinct celltypes.


### Some Preprocessing Done Manually

These processing steps can also be done in the a2c callable if they are given as functions in to the preprocess list

In [23]:

# Merge the list of adatas and convert some string to categories as they should be
adata = ad.concat(adatas)
to_categorize = ['donor', 'Cluster_preprocessed', 'point']
for c in to_categorize:
    adata.obs[c] = adata.obs[c].astype('category')

### Convert AnnData to Data on call

In [24]:
datas = a2d(adata)
datas

[Data(features=[1338, 88], labels=[1338, 36]),
 Data(features=[61, 88], labels=[61, 36]),
 Data(features=[1316, 88], labels=[1316, 36]),
 Data(features=[1540, 88], labels=[1540, 36]),
 Data(features=[1822, 88], labels=[1822, 36]),
 Data(features=[863, 88], labels=[863, 36]),
 Data(features=[564, 88], labels=[564, 36]),
 Data(features=[1023, 88], labels=[1023, 36]),
 Data(features=[324, 88], labels=[324, 36]),
 Data(features=[287, 88], labels=[287, 36]),
 Data(features=[636, 88], labels=[636, 36]),
 Data(features=[311, 88], labels=[311, 36]),
 Data(features=[890, 88], labels=[890, 36]),
 Data(features=[1235, 88], labels=[1235, 36]),
 Data(features=[1020, 88], labels=[1020, 36]),
 Data(features=[1241, 88], labels=[1241, 36]),
 Data(features=[1438, 88], labels=[1438, 36]),
 Data(features=[1021, 88], labels=[1021, 36]),
 Data(features=[1632, 88], labels=[1632, 36]),
 Data(features=[780, 88], labels=[780, 36]),
 Data(features=[524, 88], labels=[524, 36]),
 Data(features=[669, 88], labels=[6

## Squidpy Datasets

In [25]:
import squidpy as sq

adata = sq.datasets.mibitof()


In [26]:
fields = {
    'features':['obs/Cluster','obs/donor'],
    'labels':['X']
}


preprocess = [
    lambda x,_: transforms.add_design_matrix(x,'obs/Cluster','obs/donor','design_matrix')
]

category_to_iterate = 'library_id'

a2c = AnnData2DataByCategory(fields=fields,category=category_to_iterate,preprocess=preprocess,yields_edge_index=True)



In [27]:
a2c(adata)

[Data(edge_index=[2, 6138], features=[1023, 10], labels=[1023, 36]),
 Data(edge_index=[2, 7446], features=[1241, 10], labels=[1241, 36]),
 Data(edge_index=[2, 6270], features=[1045, 10], labels=[1045, 36])]