# Previewing Shapes of Fields
This example requires openpyxl to be installed. You can install it with pip:
```bash
pip install openpyxl
```

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import anndata as ad
from geome import transforms, ann2data
from utils.datasets import DatasetHartmann
import warnings

## Setting up the experiment

In [3]:
fields = {
    "x": ["obs/Cluster_preprocessed", "obs/donor"],
    "edge_index": ["uns/edge_index"],
    "edge_weight": ["uns/edge_weight"],
    "y": ["X"],
}


preprocess = transforms.Compose(
    [
        transforms.Categorize(["Cluster_preprocessed", "donor"], axis="obs"),
    ]
)

transform = transforms.AddEdgeIndex(edge_index_key="edge_index", edge_weight_key="edge_weight", key_added="spatial", spatial_key="spatial", func_args={"n_neighs": 4})



category_to_iterate = "point"

a2d = ann2data.Ann2DataByCategory(
    fields=fields,
    category=category_to_iterate,
    preprocess=preprocess,
    transform=transform,
)

a2d  # won't show any sizes until we call next(a2d(adata)) at least once

x:
    -----------------------------------
    obs/Cluster_preprocessed ('?',)
    obs/donor ('?',)
    -----------------------------------
edge_index:
    -----------------------------------
    uns/edge_index ('?',)
    -----------------------------------
edge_weight:
    -----------------------------------
    uns/edge_weight ('?',)
    -----------------------------------
y:
    -----------------------------------
    X ('?',)
    -----------------------------------

## Load data

In [4]:
# Mibitof
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    dataset = DatasetHartmann(data_path="./example_data/hartmann/")
    adatas = list(dataset.img_celldata.values())

# Merge the list of adatas and convert some string to categories as they should be
adata = ad.concat(adatas)

Loading data from raw files
registering celldata
collecting image-wise celldata
adding graph-level covariates
Loaded 58 images with complete data from 4 patients over 63747 cells with 36 cell features and 8 distinct celltypes.


### Checking some dimensions
This will show the sizes of the tensors it last see. If one field has more than one locations it will only show the last dimension

In [5]:
next(a2d(adata))  # will show the sizes of the fields
a2d

x:
    -----------------------------------
    obs/Cluster_preprocessed ('-', 8)
    obs/donor ('-', 4)
    -----------------------------------
edge_index:
    -----------------------------------
    uns/edge_index (2, 5352)
    -----------------------------------
edge_weight:
    -----------------------------------
    uns/edge_weight (5352,)
    -----------------------------------
y:
    -----------------------------------
    X (1338, 36)
    -----------------------------------

In [6]:
datas = a2d(adata)
list(datas)

[Data(x=[1338, 12], edge_index=[2, 5352], y=[1338, 36], edge_weight=[5352]),
 Data(x=[311, 12], edge_index=[2, 1244], y=[311, 36], edge_weight=[1244]),
 Data(x=[768, 12], edge_index=[2, 3072], y=[768, 36], edge_weight=[3072]),
 Data(x=[1020, 12], edge_index=[2, 4080], y=[1020, 36], edge_weight=[4080]),
 Data(x=[2100, 12], edge_index=[2, 8400], y=[2100, 36], edge_weight=[8400]),
 Data(x=[1325, 12], edge_index=[2, 5300], y=[1325, 36], edge_weight=[5300]),
 Data(x=[1091, 12], edge_index=[2, 4364], y=[1091, 36], edge_weight=[4364]),
 Data(x=[1046, 12], edge_index=[2, 4184], y=[1046, 36], edge_weight=[4184]),
 Data(x=[618, 12], edge_index=[2, 2472], y=[618, 36], edge_weight=[2472]),
 Data(x=[61, 12], edge_index=[2, 244], y=[61, 36], edge_weight=[244]),
 Data(x=[1316, 12], edge_index=[2, 5264], y=[1316, 36], edge_weight=[5264]),
 Data(x=[1540, 12], edge_index=[2, 6160], y=[1540, 36], edge_weight=[6160]),
 Data(x=[1822, 12], edge_index=[2, 7288], y=[1822, 36], edge_weight=[7288]),
 Data(x=[86