# Creating Iterables From AnnData

The AnnData2Data expects an anndata iterable by default when called upon. If you want the AnnData2Data object you created to take one adata you can provide 

In [3]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
from typing import Callable, Optional

from anndata import AnnData

from geome import iterables


from geome.adata2data import AnnData2DataDefault
import pytorch_lightning as pl
import torch
import anndata as ad
from geome import transforms
from geome.adata2data import AnnData2DataByCategory
from utils.datasets import DatasetHartmann
from utils.models.non_linear_ncem import NonLinearNCEM
from geome.datamodule import GraphAnnDataModule




	geopandas.options.use_pygeos = True

If you intended to use PyGEOS, set the option to False.
  _check_geopandas_using_shapely()


In [8]:
category = 'point'
to_iterable = iterables.ToCategoryIterable('point', axis='obs')

In [10]:
#Mibitof
dataset = DatasetHartmann(data_path='./example_data/hartmann/')
adatas = list(dataset.img_celldata.values())

# Merge the list of adatas and convert some string to categories as they should be
adata = ad.concat(adatas)

adata = transforms.Categorize(['donor', 'Cluster_preprocessed', 'point'], axis='obs')(adata) 

Loading data from raw files
registering celldata




collecting image-wise celldata
adding graph-level covariates
Loaded 58 images with complete data from 4 patients over 63747 cells with 36 cell features and 8 distinct celltypes.


In [11]:
adata.obs['Cluster'].cat.categories

Index(['Endothelial', 'Epithelial', 'Fibroblast', 'Imm_other', 'Myeloid_CD11c',
       'Myeloid_CD68', 'Tcell_CD4', 'Tcell_CD8'],
      dtype='object')

In [12]:
split_adatas = list(to_iterable(adata))
split_adatas

[AnnData object with n_obs × n_vars = 1338 × 36
     obs: 'point', 'cell_id', 'cell_size', 'donor', 'Cluster', 'Cluster_preprocessed'
     obsm: 'spatial', 'node_types',
 AnnData object with n_obs × n_vars = 311 × 36
     obs: 'point', 'cell_id', 'cell_size', 'donor', 'Cluster', 'Cluster_preprocessed'
     obsm: 'spatial', 'node_types',
 AnnData object with n_obs × n_vars = 768 × 36
     obs: 'point', 'cell_id', 'cell_size', 'donor', 'Cluster', 'Cluster_preprocessed'
     obsm: 'spatial', 'node_types',
 AnnData object with n_obs × n_vars = 1020 × 36
     obs: 'point', 'cell_id', 'cell_size', 'donor', 'Cluster', 'Cluster_preprocessed'
     obsm: 'spatial', 'node_types',
 AnnData object with n_obs × n_vars = 2100 × 36
     obs: 'point', 'cell_id', 'cell_size', 'donor', 'Cluster', 'Cluster_preprocessed'
     obsm: 'spatial', 'node_types',
 AnnData object with n_obs × n_vars = 1325 × 36
     obs: 'point', 'cell_id', 'cell_size', 'donor', 'Cluster', 'Cluster_preprocessed'
     obsm: 'spatia

In [13]:
split_adatas[0].obs['Cluster']

59191     Imm_other
59192     Tcell_CD4
59193     Imm_other
59194     Imm_other
59195     Imm_other
            ...    
60524     Imm_other
60525     Imm_other
60526    Fibroblast
60527     Imm_other
60528     Imm_other
Name: Cluster, Length: 1338, dtype: category
Categories (8, object): ['Endothelial', 'Epithelial', 'Fibroblast', 'Imm_other', 'Myeloid_CD11c', 'Myeloid_CD68', 'Tcell_CD4', 'Tcell_CD8']

In [14]:
split_adatas[0].obs['Cluster'].cat.set_categories(adata.obs['Cluster'].cat.categories)

59191     Imm_other
59192     Tcell_CD4
59193     Imm_other
59194     Imm_other
59195     Imm_other
            ...    
60524     Imm_other
60525     Imm_other
60526    Fibroblast
60527     Imm_other
60528     Imm_other
Name: Cluster, Length: 1338, dtype: category
Categories (8, object): ['Endothelial', 'Epithelial', 'Fibroblast', 'Imm_other', 'Myeloid_CD11c', 'Myeloid_CD68', 'Tcell_CD4', 'Tcell_CD8']

In [15]:
split_adatas[0].obs['Cluster']

59191     Imm_other
59192     Tcell_CD4
59193     Imm_other
59194     Imm_other
59195     Imm_other
            ...    
60524     Imm_other
60525     Imm_other
60526    Fibroblast
60527     Imm_other
60528     Imm_other
Name: Cluster, Length: 1338, dtype: category
Categories (8, object): ['Endothelial', 'Epithelial', 'Fibroblast', 'Imm_other', 'Myeloid_CD11c', 'Myeloid_CD68', 'Tcell_CD4', 'Tcell_CD8']