In [None]:
from nbdev.showdoc import *
import numpy as np
import matplotlib.pyplot as plt
import torch
import FRED
device = torch.device("cuda" if torch.cuda.is_available() else 'mps' if torch.has_mps else "cpu")
print("Using device", device)
%load_ext autoreload
%autoreload 2

Using device mps


# Noise Stability Benchmarks

In [None]:
# Load single cell dataset from scvelo and get associated information
from FRED.datasets import double_helix, directed_swiss_roll_delayed, directed_sinh_branch, rnavelo, rnavelo_pcs
from FRED.data_processing import dataloader_from_ndarray, ManifoldWithVectorField
from torch.utils.data import DataLoader
import scvelo as scv
import os
from pathlib import Path
dataset_name = "pancreas"
# choose correct dataset
Xpath = f"../../data/{dataset_name}/processed/X.txt"
if os.path.exists(Xpath):
    X = np.loadtxt(Xpath)
    flow = np.loadtxt(f"../../data/{dataset_name}/processed/flow.txt")
    labels = np.loadtxt(f"../../data/{dataset_name}/processed/labels.txt")
    latent_time = np.loadtxt(f"../../data/{dataset_name}/processed/latent_time.txt")
else:
    if dataset_name == "bone marrow":
        adata = scv.datasets.bonemarrow()
    elif dataset_name == "dentategyrus":
        adata = scv.datasets.dentategyrus()
    elif dataset_name == "pancreas":
        adata = scv.datasets.pancreas()
    elif dataset_name == "dentategyrus_lamanno":
        adata = scv.datasets.dentategyrus_lamanno()
    print("processing data with pcs")
    X, flow, labels, n_pcs = rnavelo_pcs(adata)
    # dynamical recovery
    scv.tl.recover_dynamics(adata, n_jobs=1)
    scv.tl.latent_time(adata)
    latent_time = adata.obs['latent_time'].to_numpy()
    # save the processed data to np txt files for ready loading in the future
    Path(f"../../data/{dataset_name}/processed").mkdir(parents = True,exist_ok=True)
    np.savetxt(f"../../data/{dataset_name}/processed/X.txt",X)
    np.savetxt(f"../../data/{dataset_name}/processed/flow.txt",flow)
    np.savetxt(f"../../data/{dataset_name}/processed/labels.txt",labels)
    np.savetxt(f"../../data/{dataset_name}/processed/latent_time.txt",latent_time)

In [None]:
import umap
from FRED.data_processing import dataloader_from_ndarray_V2
from FRED.embed import ManifoldFlowEmbedder
from FRED.trainers import Trainer, visualize_points
def noise_stability_test(X, flow, labels, noise_level = 0.1, method = "FRED"):
    """
    Tests the robustness of a method against noise added to both points and flows.
    """
    # add noise to points
    X_noisy = X + torch.rand_like(X)*noise_level*max(torch.linalg.norm(X,axis=1))
    # randomly reverse some percentage of the flow
    flow_noisy = flow
    chosen_idxs = torch.rand(len(flow)) > noise_level
    flow_noisy[chosen_idxs] *= -1
    match method:
        case "umap":
            # Fit UMAP (proxy for scvelo)
            reducer = umap.UMAP()
            umap_coords = torch.tensor(reducer.fit_transform(X_noisy))
        case "FRED":
            # Train FRED
            # build dataloader and set up FRED
            dataloader = dataloader_from_ndarray_V2(X_noisy,flow_noisy,labels,batch_size=256)
            MFE = ManifoldFlowEmbedder(
                        embedding_dimension=2,
                        embedder_shape=[3, 4, 8, 8, 8, 4, 2],
                        device=device,
                        sigma=0.5,
                        flow_strength=0.5,
                    )
            loss_weights = {
                        "distance regularization": 100,
                        "contrastive loss v2": 1,
                        "smoothness": 0,
                    }
            visualization_functions = []
            FREDtrainer = Trainer(FE = MFE, 
                    loss_weights=loss_weights, 
                    device=device, 
                    title="Noisy Embedding", 
                    visualization_functions=visualization_functions, 
                    data_type="Contrastive Flow",
                    scheduler=None,
                    learning_rate=1e-3)
            FREDtrainer.fit(dataloader, n_epochs = 1000)
            FREDtrainer.visualize_embedding()
            fred_coords = FREDtrainer.embedded_points
        case _:
            raise NotImplementedError("Must Specify UMAP or FRED")
    # Run metrics on points
    


In [None]:
torch.rand(len(flow)) > 0.1

tensor([True, True, True,  ..., True, True, True])

In [None]:
flow[torch.rand(len(flow)) > 0.1] *= -1

In [None]:
flow[torch.rand(len(flow)) > 0.1]

array([[ 0.0312342 ,  0.01645556, -0.0887748 , ..., -0.01706855,
        -0.00670738, -0.01429297],
       [ 0.02287296, -0.00859926, -0.07289554, ..., -0.0130601 ,
        -0.00387765, -0.00150005],
       [ 0.00143617,  0.00117963,  0.01831046, ..., -0.00938447,
         0.0002142 ,  0.00056758],
       ...,
       [ 0.02528729,  0.01144369, -0.01577021, ..., -0.01627857,
        -0.00110882, -0.00568943],
       [ 0.03875643, -0.03800944, -0.01107999, ...,  0.00150418,
         0.00441471,  0.00549325],
       [ 0.05843243, -0.08093039,  0.09355683, ..., -0.00656454,
         0.05342222, -0.01849617]])