# Non-Linear NCEM Example

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pytorch_lightning as pl
import torch
import anndata as ad
from geome import transforms
from geome.adata2data import AnnData2DataByCategory
from utils.datasets import DatasetHartmann
from utils.models.non_linear_ncem import NonLinearNCEM
from geome.datamodule import GraphAnnDataModule



	geopandas.options.use_pygeos = True

If you intended to use PyGEOS, set the option to False.
  _check_geopandas_using_shapely()


In [3]:
fields = {
    'x':['obs/Cluster_preprocessed','obs/donor'],
    'edge_index': ['uns/edge_index'],
    'y':['X']
}

from geome.transforms import Categorize, AddDesignMatrix, Compose, AddAdjMatrix, AddEdgeIndex

adj_matrix_loc = 'obsp/adjacency_matrix_connectivities'


preprocess = Categorize(['donor', 'Cluster_preprocessed', 'point'],axis='obs')
transform = Compose([
    AddAdjMatrix(location=adj_matrix_loc),
    AddEdgeIndex(adj_matrix_loc=adj_matrix_loc,edge_index_key='edge_index'),
])


category_to_iterate = 'point'

a2d = AnnData2DataByCategory(
    fields=fields,
    category=category_to_iterate,
    preprocess=preprocess,
    transform=transform,
)


#Mibitof
dataset = DatasetHartmann(data_path='./example_data/hartmann/')
adatas = list(dataset.img_celldata.values())

# Merge the list of adatas and convert some string to categories as they should be
adata = ad.concat(adatas)

datas = a2d(adata)
datas

Loading data from raw files
registering celldata




collecting image-wise celldata
adding graph-level covariates
Loaded 58 images with complete data from 4 patients over 63747 cells with 36 cell features and 8 distinct celltypes.


[Data(x=[1338, 12], edge_index=[2, 8028], y=[1338, 36]),
 Data(x=[311, 12], edge_index=[2, 1866], y=[311, 36]),
 Data(x=[768, 12], edge_index=[2, 4608], y=[768, 36]),
 Data(x=[1020, 12], edge_index=[2, 6120], y=[1020, 36]),
 Data(x=[2100, 12], edge_index=[2, 12600], y=[2100, 36]),
 Data(x=[1325, 12], edge_index=[2, 7950], y=[1325, 36]),
 Data(x=[1091, 12], edge_index=[2, 6546], y=[1091, 36]),
 Data(x=[1046, 12], edge_index=[2, 6276], y=[1046, 36]),
 Data(x=[618, 12], edge_index=[2, 3708], y=[618, 36]),
 Data(x=[61, 12], edge_index=[2, 366], y=[61, 36]),
 Data(x=[1316, 12], edge_index=[2, 7896], y=[1316, 36]),
 Data(x=[1540, 12], edge_index=[2, 9240], y=[1540, 36]),
 Data(x=[1822, 12], edge_index=[2, 10932], y=[1822, 36]),
 Data(x=[863, 12], edge_index=[2, 5178], y=[863, 36]),
 Data(x=[564, 12], edge_index=[2, 3384], y=[564, 36]),
 Data(x=[1023, 12], edge_index=[2, 6138], y=[1023, 36]),
 Data(x=[324, 12], edge_index=[2, 1944], y=[324, 36]),
 Data(x=[287, 12], edge_index=[2, 1722], y=[28

In [4]:
num_features = datas[0].x.shape[1]
out_channels = datas[0].y.shape[1]
num_features, out_channels

(12, 36)

In [5]:
dm = GraphAnnDataModule(datas=datas, num_workers = 12, batch_size=100,learning_type='node')
model = NonLinearNCEM(
    in_channels=num_features,
    out_channels=out_channels,
    encoder_hidden_dims=[16],
    decoder_hidden_dims=[16],
    latent_dim=14,
    lr=0.001,weight_decay=0.00001)

In [6]:
trainer:pl.Trainer = pl.Trainer(accelerator='gpu' if torch.torch.cuda.is_available() else 'cpu',
                                max_epochs=100,log_every_n_steps=10)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [7]:
trainer.fit(model,datamodule=dm)

Missing logger folder: /home/sel/projects/geome/geome/docs/notebooks/lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type            | Params
--------------------------------------------------
0 | encoder       | GNNModel        | 446   
1 | decoder_sigma | MLPModel        | 852   
2 | decoder_mu    | MLPModel        | 852   
3 | loss_module   | GaussianNLLLoss | 0     
--------------------------------------------------
2.1 K     Trainable params
0         Non-trainable params
2.1 K     Total params
0.009     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

ImportError: Caught ImportError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/sel/mambaforge/envs/gnn/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/sel/mambaforge/envs/gnn/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 54, in fetch
    return self.collate_fn(data)
  File "/home/sel/mambaforge/envs/gnn/lib/python3.10/site-packages/torch_geometric/loader/node_loader.py", line 117, in collate_fn
    out = self.node_sampler.sample_from_nodes(input_data)
  File "/home/sel/mambaforge/envs/gnn/lib/python3.10/site-packages/torch_geometric/sampler/neighbor_sampler.py", line 174, in sample_from_nodes
    return node_sample(inputs, self._sample)
  File "/home/sel/mambaforge/envs/gnn/lib/python3.10/site-packages/torch_geometric/sampler/neighbor_sampler.py", line 358, in node_sample
    out = sample_fn(seed, seed_time)
  File "/home/sel/mambaforge/envs/gnn/lib/python3.10/site-packages/torch_geometric/sampler/neighbor_sampler.py", line 325, in _sample
    raise ImportError(f"'{self.__class__.__name__}' requires "
ImportError: 'NeighborSampler' requires either 'pyg-lib' or 'torch-sparse'


In [None]:
trainer.test(model, datamodule=dm)

Testing: 0it [00:00, ?it/s]

[{'test_r2_score': -0.9155278940025104, 'test_loss': 43.77671432495117}]