# RegVelo benchmark on dyngen data

Notebook benchmarks velocity and latent time inference using RegVelo on dyngen-generated data.

## Library imports

In [None]:
import numpy as np
import pandas as pd
import torch
from sklearn.metrics import roc_auc_score

import anndata as ad
from regvelo import REGVELOVI

from rgv_tools import DATA_DIR
from rgv_tools.benchmarking import get_time_correlation, get_velocity_correlation, set_output

## Constants

In [None]:
DATASET = "dyngen"

In [None]:
SAVE_DATA = True
if SAVE_DATA:
    (DATA_DIR / DATASET / "results").mkdir(parents=True, exist_ok=True)

## Velocity pipeline

In [None]:
velocity_correlation = []
time_correlation = []
grn_correlation = []

for filename in (DATA_DIR / DATASET / "processed").iterdir():
    torch.cuda.empty_cache()
    if filename.suffix != ".zarr":
        continue

    adata = ad.io.read_zarr(filename)

    W = torch.ones([adata.n_vars, adata.n_vars])
    REGVELOVI.setup_anndata(adata, spliced_layer="Ms", unspliced_layer="Mu")
    vae = REGVELOVI(adata, W=W, t_max=20)
    vae.train()

    set_output(adata, vae, n_samples=30)

    velocity_correlation.append(
        get_velocity_correlation(
            ground_truth=adata.layers["true_velocity"], estimated=adata.layers["velocity"], aggregation=np.mean
        )
    )
    time_correlation.append(
        get_time_correlation(ground_truth=adata.obs["true_time"], estimated=adata.layers["fit_t"].mean(axis=1))
    )

    grn_true = adata.uns["true_skeleton"]
    grn_sc_true = adata.uns["true_sc_grn"]

    grn_estimate = vae.module.v_encoder.GRN_Jacobian2(torch.tensor(adata.layers["Ms"]).to("cuda:0"))
    grn_estimate = grn_estimate.cpu().detach().numpy()

    grn_auroc = []
    for cell_id in range(adata.n_obs):
        ground_truth = grn_sc_true[:, :, cell_id]
        ground_truth = ground_truth.T[np.array(grn_true.T) == 1]
        ground_truth[ground_truth != 0] = 1

        estimated = grn_estimate[cell_id, :, :][np.array(grn_true.T) == 1]

        number = min(10000, len(ground_truth))
        estimated, index = torch.topk(torch.tensor(estimated), number)

        grn_auroc.append(roc_auc_score(ground_truth[index], estimated))
    grn_correlation.append(np.mean(grn_auroc))

## Data saving

In [None]:
if SAVE_DATA:
    pd.DataFrame({"velocity": velocity_correlation, "time": time_correlation}).to_parquet(
        path=DATA_DIR / DATASET / "results" / "velovi_correlation.parquet"
    )