# cell2fate benchmark on dyngen data

Notebook benchmarks velocity and latent time inference using cell2fate on dyngen-generated data.

Note that cell2fate requires `anndata==0.8.0` and `scvi-tools==0.16.1`.

## Library imports

In [1]:
import contextlib
import io

import numpy as np
import pandas as pd
import scipy
import torch

import anndata as ad
import cell2fate as c2f
import scanpy as sc

from pathlib import Path

from typing import Callable, Union
from numpy.typing import ArrayLike

2025-04-28 21:45:09.738523: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-04-28 21:46:06.193779: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2025-04-28 21:46:06.207855: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory
Global seed set to 0


In [2]:
DATA_DIR = Path('/lustre/groups/ml01/workspace/yifan.chen/regvelo_reproducibility/data')

## Function definitions

In [3]:
# Function for train model and get output
def train_c2f_model(adata):
    """cell2fate pipeline."""
    c2f.Cell2fate_DynamicalModel.setup_anndata(adata, spliced_label="spliced_raw", unspliced_label="unspliced_raw")
    n_modules = c2f.utils.get_max_modules(adata)
    mod = c2f.Cell2fate_DynamicalModel(adata, n_modules=n_modules)
    mod.train()

    adata = mod.export_posterior(
        adata, sample_kwargs={"batch_size": None, "num_samples": 30, "return_samples": True, "use_gpu": False}
    )
    adata = mod.compute_module_summary_statistics(adata)
    with contextlib.redirect_stdout(io.StringIO()):
        adata.layers["Spliced Mean"] = mod.samples["post_sample_means"]["mu_expression"][..., 1]
        c2f_velocity = (
            torch.tensor(mod.samples["post_sample_means"]["beta_g"])
            * mod.samples["post_sample_means"]["mu_expression"][..., 0]
            - torch.tensor(mod.samples["post_sample_means"]["gamma_g"])
            * mod.samples["post_sample_means"]["mu_expression"][..., 1]
        )
        adata.layers["velocity"] = c2f_velocity.numpy()

    adata.layers["Ms"] = adata.layers["spliced"].copy()

    return adata

In [4]:
def pearsonr(x: ArrayLike, y: ArrayLike, axis: int = 0) -> ArrayLike:
    """Compute Pearson correlation between axes of two arrays.

    Parameters
    ----------
    x
        Input array.
    y
        Input array.
    axis
        Axis along which Pearson correlation is computed.

    Returns
    -------
    Axis-wise Pearson correlations.
    """
    centered_x = x - np.mean(x, axis=axis, keepdims=True)
    centered_y = y - np.mean(y, axis=axis, keepdims=True)

    r_num = np.add.reduce(centered_x * centered_y, axis=axis)
    r_den = np.sqrt((centered_x * centered_x).sum(axis=axis) * (centered_y * centered_y).sum(axis=axis))

    return r_num / r_den

In [5]:
def get_velocity_correlation(
    ground_truth: ArrayLike, estimated: ArrayLike, aggregation: Union[Callable, None], axis: int = 0
) -> Union[ArrayLike, float]:
    """Compute Pearson correlation between ground truth and estimated values.

    Parameters
    ----------
    ground_truth
        Array of ground truth value.
    estimated
        Array of estimated values.
    aggregation
        If `None`, the function returns every pairwise correlation between ground truth and the estimate. If it is a
        function, the correlations are aggregated accordningly.
    axis
        Axis along which ground truth and estimate is compared.

    Returns
    -------
    Axis-wise Pearson correlations potentially aggregated.
    """
    correlation = pearsonr(ground_truth, estimated, axis=axis)

    if aggregation is None:
        return correlation
    elif callable(aggregation):
        return aggregation(correlation)

## Constants

In [6]:
DATASET = "dyngen"

In [7]:
COMPLEXITY = "complexity_1"

In [8]:
SAVE_DATA = True
if SAVE_DATA:
    (DATA_DIR / DATASET / COMPLEXITY / "results").mkdir(parents=True, exist_ok=True)

In [9]:
SAVE_DATASETS = True
if SAVE_DATASETS:
    (DATA_DIR / DATASET / COMPLEXITY / "trained_cell2fate").mkdir(parents=True, exist_ok=True)

## Velocity pipeline

In [10]:
import os

velocity_correlation = []

cnt = 0
for filename in (DATA_DIR / DATASET / COMPLEXITY / "processed").iterdir():
    torch.cuda.empty_cache()
    if filename.suffix != ".zarr":
        continue

    simulation_id = int(filename.stem.removeprefix("simulation_"))
    print(f"Run {cnt}, dataset {simulation_id}.")
    
    adata = ad.read_zarr(filename)

    ## cell2fate needs cluster information
    sc.tl.leiden(adata)

    adata = c2f.utils.get_training_data(
        adata,
        cells_per_cluster=10**5,
        cluster_column="leiden",
        remove_clusters=[],
    )

    adata = train_c2f_model(adata)

    # save data
    adata.write_zarr(DATA_DIR / DATASET / COMPLEXITY / "trained_cell2fate" / f"trained_{simulation_id}.zarr")

    velocity_correlation.append(
        get_velocity_correlation(
            ground_truth=adata.layers["true_velocity"], estimated=adata.layers["velocity"], aggregation=np.mean
        )
    )
    cnt += 1

Run 0, dataset 29.
Keeping at most 100000 cells per cluster
Filtered out 8 genes that are detected 10 counts (shared).
Skip filtering by dispersion since number of variables are less than `n_top_genes`.


No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)


Leiden clustering ...
Number of Leiden Clusters: 3
Maximal Number of Modules: 3


  from torch.distributed._sharded_tensor import pre_load_state_dict_hook, state_dict_hook
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Epoch 500/500: 100%|██████████| 500/500 [01:20<00:00,  6.22it/s, v_num=1, elbo_train=2.89e+5]
Sampling local variables, batch: 100%|██████████| 1/1 [00:02<00:00,  2.36s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:00<00:00, 35.83it/s]
Run 1, dataset 14.
Keeping at most 100000 cells per cluster
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 6
Maximal Number of Modules: 6
Epoch 500/500: 100%|██████████| 500/500 [01:01<00:00,  8.07it/s, v_num=1, elbo_train=9.92e+5]
Sampling local variables, batch: 100%|██████████| 1/1 [00:02<00:00,  2.67s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:02<00:00, 12.57it/s]
Run 2, dataset 24.
Keeping at most 100000 cells per cluster
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 19
Maximal Number of Modules: 21
Epoch 500/500: 100%|██████████| 500/500 [01:48<00:00,  4.60it/s, v_num=1, elbo_train=8.38e+6]
Sampling local variables, batch: 100%|██████████| 1/1 [00:07<00:00,  7.07s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:06<00:00,  4.65it/s]
Run 3, dataset 28.
Keeping at most 100000 cells per cluster
Filtered out 26 genes that are detected 10 counts (shared).
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 5
Maximal Number of Modules: 5
Epoch 500/500: 100%|██████████| 500/500 [00:57<00:00,  8.74it/s, v_num=1, elbo_train=5.61e+5]
Sampling local variables, batch: 100%|██████████| 1/1 [00:01<00:00,  1.82s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:01<00:00, 17.93it/s]
Run 4, dataset 6.
Keeping at most 100000 cells per cluster
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 16
Maximal Number of Modules: 18
Epoch 500/500: 100%|██████████| 500/500 [01:33<00:00,  5.33it/s, v_num=1, elbo_train=7.98e+6]
Sampling local variables, batch: 100%|██████████| 1/1 [00:05<00:00,  5.84s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:05<00:00,  5.18it/s]
Run 5, dataset 21.
Keeping at most 100000 cells per cluster
Filtered out 6 genes that are detected 10 counts (shared).
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 7
Maximal Number of Modules: 8
Epoch 500/500: 100%|██████████| 500/500 [01:03<00:00,  7.91it/s, v_num=1, elbo_train=7.08e+5]
Sampling local variables, batch: 100%|██████████| 1/1 [00:02<00:00,  2.02s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:01<00:00, 15.70it/s]
Run 6, dataset 15.
Keeping at most 100000 cells per cluster
Filtered out 3 genes that are detected 10 counts (shared).
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 7
Maximal Number of Modules: 8
Epoch 500/500: 100%|██████████| 500/500 [01:04<00:00,  7.73it/s, v_num=1, elbo_train=5.5e+5] 
Sampling local variables, batch: 100%|██████████| 1/1 [00:01<00:00,  1.90s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:02<00:00, 14.23it/s]
Run 7, dataset 9.
Keeping at most 100000 cells per cluster
Filtered out 4 genes that are detected 10 counts (shared).
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 10
Maximal Number of Modules: 11
Epoch 500/500: 100%|██████████| 500/500 [01:14<00:00,  6.73it/s, v_num=1, elbo_train=3.42e+5]
Sampling local variables, batch: 100%|██████████| 1/1 [00:02<00:00,  2.22s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:01<00:00, 14.60it/s]
Run 8, dataset 12.
Keeping at most 100000 cells per cluster
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 16
Maximal Number of Modules: 18
Epoch 500/500: 100%|██████████| 500/500 [01:33<00:00,  5.34it/s, v_num=1, elbo_train=3.47e+6]
Sampling local variables, batch: 100%|██████████| 1/1 [00:03<00:00,  3.71s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:03<00:00,  8.46it/s]
Run 9, dataset 19.
Keeping at most 100000 cells per cluster
Filtered out 9 genes that are detected 10 counts (shared).
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 10
Maximal Number of Modules: 11
Epoch 500/500: 100%|██████████| 500/500 [01:14<00:00,  6.74it/s, v_num=1, elbo_train=2.92e+5]
Sampling local variables, batch: 100%|██████████| 1/1 [00:01<00:00,  1.64s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:01<00:00, 18.92it/s]
Run 10, dataset 4.
Keeping at most 100000 cells per cluster
Filtered out 4 genes that are detected 10 counts (shared).
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 4
Maximal Number of Modules: 4
Epoch 500/500: 100%|██████████| 500/500 [00:53<00:00,  9.40it/s, v_num=1, elbo_train=3.95e+5]
Sampling local variables, batch: 100%|██████████| 1/1 [00:01<00:00,  1.54s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:01<00:00, 20.82it/s]
Run 11, dataset 13.
Keeping at most 100000 cells per cluster
Filtered out 12 genes that are detected 10 counts (shared).
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 5
Maximal Number of Modules: 5
Epoch 500/500: 100%|██████████| 500/500 [00:56<00:00,  8.81it/s, v_num=1, elbo_train=3.75e+5]
Sampling local variables, batch: 100%|██████████| 1/1 [00:01<00:00,  1.84s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:01<00:00, 17.96it/s]
Run 12, dataset 2.
Keeping at most 100000 cells per cluster
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 16
Maximal Number of Modules: 18
Epoch 500/500: 100%|██████████| 500/500 [01:34<00:00,  5.32it/s, v_num=1, elbo_train=6.81e+6]
Sampling local variables, batch: 100%|██████████| 1/1 [00:05<00:00,  5.27s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:04<00:00,  6.12it/s]
Run 13, dataset 16.
Keeping at most 100000 cells per cluster
Filtered out 4 genes that are detected 10 counts (shared).
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 5
Maximal Number of Modules: 5
Epoch 500/500: 100%|██████████| 500/500 [00:58<00:00,  8.62it/s, v_num=1, elbo_train=5.78e+5]
Sampling local variables, batch: 100%|██████████| 1/1 [00:01<00:00,  1.75s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:01<00:00, 18.25it/s]
Run 14, dataset 1.
Keeping at most 100000 cells per cluster
Filtered out 16 genes that are detected 10 counts (shared).
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 7
Maximal Number of Modules: 8
Epoch 500/500: 100%|██████████| 500/500 [01:02<00:00,  8.05it/s, v_num=1, elbo_train=2.64e+5]
Sampling local variables, batch: 100%|██████████| 1/1 [00:01<00:00,  1.55s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:01<00:00, 20.53it/s]
Run 15, dataset 18.
Keeping at most 100000 cells per cluster
Filtered out 2 genes that are detected 10 counts (shared).
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 7
Maximal Number of Modules: 8
Epoch 500/500: 100%|██████████| 500/500 [01:02<00:00,  8.03it/s, v_num=1, elbo_train=4.66e+5]
Sampling local variables, batch: 100%|██████████| 1/1 [00:01<00:00,  1.72s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:01<00:00, 15.20it/s]
Run 16, dataset 5.
Keeping at most 100000 cells per cluster
Filtered out 27 genes that are detected 10 counts (shared).
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 18
Maximal Number of Modules: 20
Epoch 500/500: 100%|██████████| 500/500 [01:42<00:00,  4.88it/s, v_num=1, elbo_train=2.72e+5]
Sampling local variables, batch: 100%|██████████| 1/1 [00:02<00:00,  2.52s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:02<00:00, 12.56it/s]
Run 17, dataset 10.
Keeping at most 100000 cells per cluster
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 18
Maximal Number of Modules: 20
Epoch 500/500: 100%|██████████| 500/500 [01:42<00:00,  4.90it/s, v_num=1, elbo_train=5.16e+6]
Sampling local variables, batch: 100%|██████████| 1/1 [00:05<00:00,  5.36s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:04<00:00,  6.30it/s]
Run 18, dataset 8.
Keeping at most 100000 cells per cluster
Filtered out 16 genes that are detected 10 counts (shared).
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 5
Maximal Number of Modules: 5
Epoch 500/500: 100%|██████████| 500/500 [00:57<00:00,  8.73it/s, v_num=1, elbo_train=4.02e+5]
Sampling local variables, batch: 100%|██████████| 1/1 [00:01<00:00,  1.59s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:01<00:00, 20.33it/s]
Run 19, dataset 11.
Keeping at most 100000 cells per cluster
Filtered out 4 genes that are detected 10 counts (shared).
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 10
Maximal Number of Modules: 11
Epoch 500/500: 100%|██████████| 500/500 [01:14<00:00,  6.74it/s, v_num=1, elbo_train=3.87e+5]
Sampling local variables, batch: 100%|██████████| 1/1 [00:02<00:00,  2.24s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:02<00:00, 13.46it/s]
Run 20, dataset 27.
Keeping at most 100000 cells per cluster
Filtered out 5 genes that are detected 10 counts (shared).
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 8
Maximal Number of Modules: 9
Epoch 500/500: 100%|██████████| 500/500 [01:05<00:00,  7.67it/s, v_num=1, elbo_train=5.14e+5]
Sampling local variables, batch: 100%|██████████| 1/1 [00:01<00:00,  1.84s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:01<00:00, 16.81it/s]
Run 21, dataset 23.
Keeping at most 100000 cells per cluster
Filtered out 12 genes that are detected 10 counts (shared).
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 10
Maximal Number of Modules: 11
Epoch 500/500: 100%|██████████| 500/500 [01:13<00:00,  6.80it/s, v_num=1, elbo_train=2.98e+5]
Sampling local variables, batch: 100%|██████████| 1/1 [00:02<00:00,  2.11s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:01<00:00, 14.95it/s]
Run 22, dataset 17.
Keeping at most 100000 cells per cluster
Filtered out 29 genes that are detected 10 counts (shared).
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 4
Maximal Number of Modules: 4
Epoch 500/500: 100%|██████████| 500/500 [00:53<00:00,  9.36it/s, v_num=1, elbo_train=3.27e+5]
Sampling local variables, batch: 100%|██████████| 1/1 [00:01<00:00,  1.43s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:01<00:00, 22.26it/s]
Run 23, dataset 30.
Keeping at most 100000 cells per cluster
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 11
Maximal Number of Modules: 12
Epoch 500/500: 100%|██████████| 500/500 [01:18<00:00,  6.40it/s, v_num=1, elbo_train=1.24e+6]
Sampling local variables, batch: 100%|██████████| 1/1 [00:02<00:00,  2.87s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:02<00:00, 10.88it/s]
Run 24, dataset 22.
Keeping at most 100000 cells per cluster
Filtered out 38 genes that are detected 10 counts (shared).
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 11
Maximal Number of Modules: 12
Epoch 500/500: 100%|██████████| 500/500 [01:16<00:00,  6.54it/s, v_num=1, elbo_train=3.12e+5]
Sampling local variables, batch: 100%|██████████| 1/1 [00:02<00:00,  2.48s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:02<00:00, 12.45it/s]
Run 25, dataset 25.
Keeping at most 100000 cells per cluster
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 12
Maximal Number of Modules: 13
Epoch 500/500: 100%|██████████| 500/500 [01:21<00:00,  6.12it/s, v_num=1, elbo_train=1.87e+6]
Sampling local variables, batch: 100%|██████████| 1/1 [00:03<00:00,  3.68s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:03<00:00,  8.60it/s]
Run 26, dataset 20.
Keeping at most 100000 cells per cluster
Filtered out 11 genes that are detected 10 counts (shared).
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 3
Maximal Number of Modules: 3
Epoch 500/500: 100%|██████████| 500/500 [00:47<00:00, 10.44it/s, v_num=1, elbo_train=2e+5]   
Sampling local variables, batch: 100%|██████████| 1/1 [00:00<00:00,  1.14it/s]
Sampling global variables, sample: 100%|██████████| 29/29 [00:00<00:00, 37.04it/s]
Run 27, dataset 7.
Keeping at most 100000 cells per cluster
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 8
Maximal Number of Modules: 9
Epoch 500/500: 100%|██████████| 500/500 [01:06<00:00,  7.53it/s, v_num=1, elbo_train=1.3e+6] 
Sampling local variables, batch: 100%|██████████| 1/1 [00:02<00:00,  2.19s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:01<00:00, 14.55it/s]
Run 28, dataset 3.
Keeping at most 100000 cells per cluster
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 16
Maximal Number of Modules: 18
Epoch 500/500: 100%|██████████| 500/500 [01:32<00:00,  5.41it/s, v_num=1, elbo_train=2.25e+6]
Sampling local variables, batch: 100%|██████████| 1/1 [00:01<00:00,  1.97s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:01<00:00, 15.81it/s]
Run 29, dataset 26.
Keeping at most 100000 cells per cluster
Filtered out 18 genes that are detected 10 counts (shared).
Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Leiden clustering ...


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Number of Leiden Clusters: 4
Maximal Number of Modules: 4
Epoch 500/500: 100%|██████████| 500/500 [00:53<00:00,  9.34it/s, v_num=1, elbo_train=4e+5]   
Sampling local variables, batch: 100%|██████████| 1/1 [00:01<00:00,  1.51s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:01<00:00, 21.12it/s]


In [12]:
if SAVE_DATA:
    pd.DataFrame({"velocity": velocity_correlation}).to_parquet(
        path=DATA_DIR / DATASET / COMPLEXITY / "results" / "cell2fate_correlation.parquet"
    )