In [4]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
import pytorch_lightning as pl
import torch
import anndata as ad
from gpu_spatial_graph_pipeline.data.utils import transforms
from gpu_spatial_graph_pipeline.data.anndata2data import AnnData2DataByCategory
from gpu_spatial_graph_pipeline.data.datasets import DatasetHartmann
from gpu_spatial_graph_pipeline.models.linear_ncem import LinearNCEM
from gpu_spatial_graph_pipeline.data.datamodule import GraphAnnDataModule

In [6]:
fields = {
    'x':['obs/Cluster_preprocessed','obs/donor','obsm/design_matrix'],
    'y':['X']
}

preprocess = [
    lambda x,_: transforms.add_design_matrix(x,'obs/Cluster_preprocessed','obs/donor','design_matrix')
]

category_to_iterate = 'point'

a2d = AnnData2DataByCategory(
    fields=fields,
    category=category_to_iterate,
    preprocess=preprocess,
    yields_edge_index=True,
)

#Mibitof
dataset = DatasetHartmann(data_path='./example_data/hartmann/')
adatas = list(dataset.img_celldata.values())


# Merge the list of adatas and convert some string to categories as they should be
adata = ad.concat(adatas)
to_categorize = ['donor', 'Cluster_preprocessed', 'point']
for c in to_categorize:
    adata.obs[c] = adata.obs[c].astype('category')

datas = a2d(adata)
datas

Loading data from raw files
registering celldata




collecting image-wise celldata
adding graph-level covariates
Loaded 58 images with complete data from 4 patients over 63747 cells with 36 cell features and 8 distinct celltypes.


[Data(x=[1338, 88], edge_index=[2, 8028], y=[1338, 36]),
 Data(x=[61, 88], edge_index=[2, 366], y=[61, 36]),
 Data(x=[1316, 88], edge_index=[2, 7896], y=[1316, 36]),
 Data(x=[1540, 88], edge_index=[2, 9240], y=[1540, 36]),
 Data(x=[1822, 88], edge_index=[2, 10932], y=[1822, 36]),
 Data(x=[863, 88], edge_index=[2, 5178], y=[863, 36]),
 Data(x=[564, 88], edge_index=[2, 3384], y=[564, 36]),
 Data(x=[1023, 88], edge_index=[2, 6138], y=[1023, 36]),
 Data(x=[324, 88], edge_index=[2, 1944], y=[324, 36]),
 Data(x=[287, 88], edge_index=[2, 1722], y=[287, 36]),
 Data(x=[636, 88], edge_index=[2, 3816], y=[636, 36]),
 Data(x=[311, 88], edge_index=[2, 1866], y=[311, 36]),
 Data(x=[890, 88], edge_index=[2, 5340], y=[890, 36]),
 Data(x=[1235, 88], edge_index=[2, 7410], y=[1235, 36]),
 Data(x=[1020, 88], edge_index=[2, 6120], y=[1020, 36]),
 Data(x=[1241, 88], edge_index=[2, 7446], y=[1241, 36]),
 Data(x=[1438, 88], edge_index=[2, 8628], y=[1438, 36]),
 Data(x=[1021, 88], edge_index=[2, 6126], y=[1021

In [7]:
num_features = datas[0].x.shape[1]
out_channels = datas[0].y.shape[1]

In [11]:
dm = GraphAnnDataModule(adata=adata, adata2data_fn=a2d, num_workers = 12, batch_size=10,learning_type='graph')
model = LinearNCEM(in_channels=num_features,out_channels=out_channels, model_type='nonspatial', lr=0.0001,weight_decay=0.000001)

In [12]:
trainer:pl.Trainer = pl.Trainer(accelerator='gpu' if torch.torch.cuda.is_available() else 'cpu',
                                max_epochs=100,log_every_n_steps=10)



GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [13]:
trainer.fit(model,datamodule=dm)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type             | Params
-------------------------------------------------
0 | model_sigma | LinearNonSpatial | 3.2 K 
1 | model_mu    | LinearNonSpatial | 3.2 K 
2 | loss_module | GaussianNLLLoss  | 0     
-------------------------------------------------
6.4 K     Trainable params
0         Non-trainable params
6.4 K     Total params
0.026     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

AttributeError: 'LinearNCEM' object has no attribute 'batch_size'

In [None]:
trainer.test(model, datamodule=dm)

In [None]:
x = ad.concat(adata)

In [None]:
x.obs.donor

59191    90de
59192    90de
59193    90de
59194    90de
59195    90de
         ... 
18510    90de
18511    90de
18512    90de
18513    90de
18514    90de
Name: donor, Length: 63747, dtype: object

In [None]:
cats = x.obs.donor.unique()
for cat in cats:
    print(x[x.obs.donor == cat])

View of AnnData object with n_obs × n_vars = 18943 × 36
    obs: 'point', 'cell_id', 'cell_size', 'donor', 'Cluster', 'Cluster_preprocessed'
    obsm: 'spatial', 'node_types'
View of AnnData object with n_obs × n_vars = 22224 × 36
    obs: 'point', 'cell_id', 'cell_size', 'donor', 'Cluster', 'Cluster_preprocessed'
    obsm: 'spatial', 'node_types'
View of AnnData object with n_obs × n_vars = 5811 × 36
    obs: 'point', 'cell_id', 'cell_size', 'donor', 'Cluster', 'Cluster_preprocessed'
    obsm: 'spatial', 'node_types'
View of AnnData object with n_obs × n_vars = 16769 × 36
    obs: 'point', 'cell_id', 'cell_size', 'donor', 'Cluster', 'Cluster_preprocessed'
    obsm: 'spatial', 'node_types'


In [None]:
x.obs.donor.unique()

59191    90de
59192    90de
59193    90de
59194    90de
59195    90de
         ... 
18510    90de
18511    90de
18512    90de
18513    90de
18514    90de
Name: donor, Length: 63747, dtype: object