# DPT benchmark on dyngen data

Notebook benchmarks latent time inference using DPT on dyngen-generated data.

## Library imports

In [None]:
import numpy as np
import pandas as pd

import anndata as ad
import scanpy as sc

from rgv_tools import DATA_DIR
from rgv_tools.benchmarking import get_time_correlation

## Constants

In [None]:
DATASET = "dyngen"

In [None]:
SAVE_DATA = True
if SAVE_DATA:
    (DATA_DIR / DATASET / "results").mkdir(parents=True, exist_ok=True)

## Velocity pipeline

In [None]:
time_correlation = []

for filename in (DATA_DIR / DATASET / "processed").iterdir():
    if filename.suffix != ".zarr":
        continue

    adata = ad.io.read_zarr(filename)

    adata.uns["iroot"] = np.flatnonzero(adata.obs["true_time"] == 0)[0]

    sc.pp.neighbors(adata)
    sc.tl.diffmap(adata)
    sc.tl.dpt(adata)

    time_correlation.append(
        get_time_correlation(ground_truth=adata.obs["true_time"], estimated=adata.obs["dpt_pseudotime"].values)
    )

## Data saving

In [None]:
if SAVE_DATA:
    pd.DataFrame({"time": time_correlation}).to_parquet(path=DATA_DIR / DATASET / "results" / "dpt_correlation.parquet")