# Dataset Processing 
Process the datasets for the MOCA dataset (blood and neuron)

In [None]:
!pip install scvelo

## Further Exploration with Embryonic Dataset - Blood (and Neuron later)

In [14]:
import scanpy as sc
import pandas as pd
from scipy.io import mmread
import numpy as np

"""
Purpose: Verify the matrices and counts for the trajectory chosen
Params: dataset name (blood or neurons)
Return: introns, extrons, var, obs
"""
def verify_trajectory(dataset): 

    #1. Load matrices ( cells x genes)
    print("\nLoading count matrices:")
    exon = mmread(f"{dataset}/exp_exon.mtx").tocsr()
    intron = mmread(f"{dataset}/exp_intron.mtx").tocsr()
    print(f"Spliced: {exon.shape}")
    print(f"Unspliced: {intron.shape}")

    #2. Load cell metadata
    print("\nLoading cell metadata:")
    obs = pd.read_csv(f"{dataset}/obs.csv", index_col=0)
    print(f"Cells: {len(obs):,}")
    print(f"Columns: {list(obs.columns)}")

    #3. Load gene metadata
    print("\nLoading gene metadata:")
    var = pd.read_csv("E9.5_to_E13.5_var.csv", index_col=0) #same gene file across 
    print(f"Genes: {len(var):,}")

    #4. Verify alignment
    print("\nVerify the dimensions dimensions:")
    assert exon.shape[0] == len(obs), f"Cell mismatch: {exon.shape[0]} vs {len(obs)}"
    assert exon.shape[1] == len(var), f"Gene mismatch: {exon.shape[1]} vs {len(var)}"
    print("All dimensions aligned!")

    return intron, exon, var, obs



In [15]:
"""
Purpose: Convert trajectory information into adata
Params: intron, extron, var, obs files
Return: adata object
"""

def convert_trajectory(intron, exon, var, obs):
    # Get the adata file for easy access
    adata = sc.AnnData(X=(exon + intron).tocsr())

    # Add cell metadata
    adata.obs = obs.copy()
    adata.obs_names = obs.index.astype(str)

    # Add gene metadata  
    adata.var = var.copy()
    adata.var_names = var['gene_short_name'].values
    adata.var_names_make_unique()

    # Add spliced/unspliced layers for velocity
    adata.layers['spliced'] = exon
    adata.layers['unspliced'] = intron


    # Get numeric timepoint from 'day' column
    adata.obs['timepoint_str'] = adata.obs['day'].astype(str)
    adata.obs['timepoint'] = (
        adata.obs['day']
        .str.replace('E', '', regex=False)
        .str.replace('b', '', regex=False)  # Handle E8.5b -> 8.5
        .astype(float)
    )

    return adata


"""
Purpose: Summarize and save data
Params: adata object, dataset name
Return: None
"""
def save_adata(adata, dataset):
    # Print final summary
    print("\nFINAL ANNDATA SUMMARY")
    print("-" * 50)
    print(f"Cells: {adata.n_obs:,}")
    print(f"Genes: {adata.n_vars:,}")
    print(f"Layers: {list(adata.layers.keys())}")
    print(f"Obs cols: {list(adata.obs.columns)}")

    print(f"\nTimepoints:")
    print(adata.obs['timepoint'].value_counts().sort_index())

    print(f"\nCell types:")
    print(adata.obs['celltype'].value_counts())

    # Save file
    adata.write(f"{dataset}/{dataset}_data.h5ad")
    print(f"Saved: {dataset}/{dataset}_data.h5ad")


In [16]:
"""
Purpose: create and save file adata file!
Params: dataset name
Return: None
"""
def create_adata(dataset):
    intron, extron, var, obs = verify_trajectory(dataset)
    adata = convert_trajectory(intron, extron, var, obs)
    save_adata(adata, dataset)

In [17]:
# blood files
create_adata('blood')


Loading count matrices:
Spliced: (53268, 24552)
Unspliced: (53268, 24552)

Loading cell metadata:
Cells: 53,268
Columns: ['Anno', 'day', 'celltype', 'sample', 'batch', 'group']

Loading gene metadata:
Genes: 24,552

Verify the dimensions dimensions:
All dimensions aligned!

FINAL ANNDATA SUMMARY
--------------------------------------------------
Cells: 53,268
Genes: 24,552
Layers: ['spliced', 'unspliced']
Obs cols: ['Anno', 'day', 'celltype', 'sample', 'batch', 'group', 'timepoint_str', 'timepoint']

Timepoints:
timepoint
8.5      2877
9.5      3390
10.5     9308
11.5    14930
12.5     9090
13.5    13673
Name: count, dtype: int64

Cell types:
celltype
Definitive erythroid cells    22038
Primitive erythroid cells     21309
White blood cells              8213
Megakaryocytes                 1509
Blood progenitors               199
Name: count, dtype: int64
Saved: blood/blood_data.h5ad


In [6]:
# neuron files
create_adata('neuron')


Loading count matrices:
Spliced: (141576, 24552)
Unspliced: (141576, 24552)

Loading cell metadata:
Cells: 141,576
Columns: ['Anno', 'day', 'celltype', 'sample', 'batch', 'group', 'ID']

Loading gene metadata:
Genes: 24,552

Verify the dimensions dimensions:
All dimensions aligned!

FINAL ANNDATA SUMMARY
--------------------------------------------------
Cells: 141,576
Genes: 24,552
Layers: ['spliced', 'unspliced']
Obs cols: ['Anno', 'day', 'celltype', 'sample', 'batch', 'group', 'ID', 'timepoint_str', 'timepoint']

Timepoints:
timepoint
8.5      5000
9.5      3254
10.5    23205
11.5    39737
12.5    36687
13.5    33693
Name: count, dtype: int64

Cell types:
celltype
Spinal cord excitatory neurons         20000
Neuron progenitor cells                18855
Spinal cord inhibitory neurons         17477
Inhibitory interneurons                16864
Intermediate progenitor cells          16386
Di/mesencephalon excitatory neurons    16258
Di/mesencephalon inhibitory neurons    14571
Motor ne

## Explore Mouse Gastriculation
Taken from the scVelo Package

In [None]:
import scvelo as scv
import scanpy as sc

adata = scv.datasets.gastrulation()

print("OBS columns:")
print(adata.obs.columns.tolist())

print("VAR columns:")
print(adata.var.columns.tolist())

print("Layers:", adata.layers.keys())

adata.X  

In [None]:
# Copy the timepoint from the stage column
adata.obs["timepoint"] = adata.obs["stage"]
print(adata.obs["stage"].unique())
print(adata.obs["timepoint"].unique())

In [None]:
# Save file
adata.write("mouse_data.h5ad")

## Graph Creation
Take adata and convert it to a pytorch graph. Initially do cosine similarity between gene expression for the edge weights. Will re-weight with scVelo RNA velocity down the line. Let timepoints be node features as well as gene expression.

In [None]:
%pip uninstall torch-scatter torch-sparse torch-geometric torch-cluster torch-spline-conv -y
%pip install torch


In [None]:
%%python -c "import torch; print(torch.__version__)"
%pip install --upgrade pip

In [None]:
%pip install torch-geometric
%pip install scanpy

In [None]:
# !pip install "numpy<2"
%pip install scanpy


In [3]:
import torch
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from torch_geometric.data import Data
import scipy.sparse as sp
import scanpy as sc


  from .autonotebook import tqdm as notebook_tqdm


In [11]:
"""
Purpose: create the graph as a pytorch graph and save 
Params: dataset name, adata object, preprocessing params
Return: None
"""

def create_graph(dataset, adata, n_neighbors=15, n_comps=50, n_pcs=50, n_top_genes=2000):

    """Data preprocessing for PCA"""
    # Standard filtering and normalization
    sc.pp.filter_genes(adata, min_cells=10)
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    sc.pp.highly_variable_genes(adata, n_top_genes=n_top_genes)
    print(f"Highly variable genes: {adata.var['highly_variable'].sum()}")

    # Prevent densification issue
    adata = adata[:, adata.var["highly_variable"]].copy()
    
    sc.pp.scale(adata, max_value=10)
    sc.tl.pca(adata, n_comps=n_comps)
    expr_pca = adata.obsm['X_pca']  # [n_cells, 50]
    print(f"PCA shape: {expr_pca.shape}")
    
    """Adding Node Features"""
    x = torch.tensor(expr_pca, dtype=torch.float32)
    print(f"Node features: {x.shape[1]} (PCA components)")
    
    # Timepoint as separate attribute
    if adata.obs['timepoint'].dtype == 'object' or adata.obs['timepoint'].dtype.name == 'category':
        # Convert "E8.5" to 8.5
        timepoints = adata.obs['timepoint'].astype(str).str.replace('E', '').astype(float)
    else:
        timepoints = adata.obs['timepoint'].values
    timepoints_norm = (timepoints - timepoints.mean()) / timepoints.std()
    print(f"Timepoints: {np.unique(timepoints)}")
    print(f"Normalized Timepoints: {np.unique(timepoints_norm)}")
    
    """Adding Edges"""
    # Compute neighbors w/ PCA
    print("Calculating the neighbors with PCA")
    sc.pp.neighbors(adata, n_neighbors=n_neighbors, use_rep='X_pca') 
    connectivities = adata.obsp['connectivities']
    rows, cols = connectivities.nonzero()
    
    # cosine similarity on PCA embeddings (vectorized for speed)
    print("Running cosine similarity on neighbors")
    source_embeddings = expr_pca[rows]  # [n_edges, 50]
    target_embeddings = expr_pca[cols]  # [n_edges, 50]
    
    # Compute all similarities at once
    edge_weights = np.sum(source_embeddings * target_embeddings, axis=1) / (
        np.linalg.norm(source_embeddings, axis=1) * np.linalg.norm(target_embeddings, axis=1) + 1e-8
    )
    
    edge_index = torch.tensor(np.vstack([rows, cols]), dtype=torch.long)
    edge_attr = torch.tensor(edge_weights, dtype=torch.float32).unsqueeze(1)
    
    print(f"Edges: {edge_index.shape[1]:,}")
    print(f"Edge weights: [{edge_weights.min():.3f}, {edge_weights.max():.3f}]")
    
    """Adding Metadata (Cell Labels)"""
    celltype_cat = adata.obs['celltype'].astype('category')
    celltype_codes = torch.tensor(celltype_cat.cat.codes.values, dtype=torch.long)
    celltype_names = list(celltype_cat.cat.categories)
    print(f"Cell types: {len(celltype_names)}")
    
    """Create Graph"""
    graph = Data(
        x=x,                              # [n_cells, 50] PCA embeddings
        edge_index=edge_index,            # [2, n_edges]
        edge_attr=edge_attr,              # [n_edges, 1] cosine similarity
        timepoint=timepoints,             # [n_cells] raw timepoint
        timepoint_norm=timepoints_norm,   # [n_cells] normalized timepoint
        celltype=celltype_codes,          # [n_cells] integer cell type labels
    )
    
    # Store metadata (not tensors)
    graph.celltype_names = celltype_names
    graph.n_cells = adata.n_obs
    graph.n_pcs = n_comps
    
    print("Graph created:", graph)
    
    # Save graph as pytorch graph and rewrite adata
    torch.save(graph, f"{dataset}/{dataset}_graph.pt")
    print(f"Saved graph to {dataset}/{dataset}_graph.pt")
    
    adata.write(f"{dataset}/{dataset}_data.h5ad")
    print(f"Updated .h5ad file {dataset}/{dataset}_data.h5ad")


In [18]:
# Load the adata BLOOD
adata = sc.read_h5ad("blood/blood_data.h5ad")
print(f"Loaded: {adata.n_obs:,} cells and {adata.n_vars:,} genes")

create_graph('blood',adata, n_neighbors=15, n_comps=50, n_pcs=50, n_top_genes=2000)

Loaded: 53,268 cells and 24,552 genes
Highly variable genes: 2000


  return dispatch(args[0].__class__)(*args, **kw)


PCA shape: (53268, 50)
Node features: 50 (PCA components)
Timepoints: [ 8.5  9.5 10.5 11.5 12.5 13.5]
Normalized Timepoints: [-2.24107041 -1.54507785 -0.8490853  -0.15309275  0.5428998   1.23889235]
Calculating the neighbors with PCA
Running cosine similarity on neighbors
Edges: 1,343,922
Edge weights: [0.127, 0.989]
Cell types: 5
Graph created: Data(x=[53268, 50], edge_index=[2, 1343922], edge_attr=[1343922, 1], timepoint=[53268], timepoint_norm=[53268], celltype=[53268], celltype_names=[5], n_cells=53268, n_pcs=50)
Saved graph to blood/blood_graph.pt
Updated .h5ad file blood/blood_data.h5ad


In [12]:
# Load the adata NEURON
adata = sc.read_h5ad("neuron/neuron_data.h5ad")
print(f"Loaded: {adata.n_obs:,} cells and {adata.n_vars:,} genes")

create_graph('neuron',adata, n_neighbors=15, n_comps=50, n_pcs=50, n_top_genes=2000)

Loaded: 141,576 cells and 24,552 genes
Highly variable genes: 2000


  return dispatch(args[0].__class__)(*args, **kw)


PCA shape: (141576, 50)
Node features: 50 (PCA components)
Timepoints: [ 8.5  9.5 10.5 11.5 12.5 13.5]
Normalized Timepoints: [-2.69563266 -1.90727003 -1.1189074  -0.33054477  0.45781787  1.2461805 ]
Calculating the neighbors with PCA
Running cosine similarity on neighbors
Edges: 3,294,318
Edge weights: [0.112, 0.983]
Cell types: 10
Graph created: Data(x=[141576, 50], edge_index=[2, 3294318], edge_attr=[3294318, 1], timepoint=[141576], timepoint_norm=[141576], celltype=[141576], celltype_names=[10], n_cells=141576, n_pcs=50)
Saved graph to neuron/neuron_graph.pt
Updated .h5ad file neuron/neuron_data.h5ad


In [9]:
# Load the adata for the mouse dataset
adata = sc.read_h5ad("mouse/mouse_data.h5ad")
print(f"Loaded: {adata.n_obs:,} cells and {adata.n_vars:,} genes")

create_graph('mouse', adata, n_neighbors=15, n_comps=50, n_pcs=50, n_top_genes=2000)

Loaded: 89,267 cells and 53,801 genes
Highly variable genes: 2000


  return dispatch(args[0].__class__)(*args, **kw)


PCA shape: (89267, 50)
Node features: 50 (PCA components)
Timepoints: [6.5  7.5  6.75 7.75 7.   8.   8.5  7.25 8.25]
Normalized Timepoints: [-2.25657996 -0.49925956 -1.81724986 -0.05992945 -1.37791976  0.37940065
  1.25806085 -0.93858966  0.81873075]
Calculating the neighbors with PCA
Running cosine similarity on neighbors
Edges: 1,961,242
Edge weights: [0.223, 0.998]
Cell types: 34
Graph created: Data(x=[89267, 50], edge_index=[2, 1961242], edge_attr=[1961242, 1], timepoint=index
cell_1         6.5
cell_2         6.5
cell_6         6.5
cell_8         6.5
cell_9         6.5
              ... 
cell_139326    8.5
cell_139327    8.5
cell_139329    8.5
cell_139330    8.5
cell_139331    8.5
Name: timepoint, Length: 89267, dtype: float64, timepoint_norm=index
cell_1        -2.256580
cell_2        -2.256580
cell_6        -2.256580
cell_8        -2.256580
cell_9        -2.256580
                 ...   
cell_139326    1.258061
cell_139327    1.258061
cell_139329    1.258061
cell_139330    1.258

## Reweight the edges
Use scVelo and the equation cosine_w + alpha * cosine_similarity(distance between cells, RNA velocity). This should downweight cells not in the direction of the RNA velocity, while upweighting those that are closer

In [None]:
%pip install scvelo

In [21]:
import scvelo as scv
import scanpy as sc
import numpy as np
import torch

"""
Purpose: Compute RNA velocity using scVelo
Params: adata object, preprocessing parameters
Return: adata with velocity computed
"""
def compute_velocity(adata, min_shared_counts=20, n_top_genes=2000, n_pcs=50, n_neighbors=15):
    print("COMPUTING RNA VELOCITY")
    
    # scVelo preprocessing
    print("\nPreprocessing for velocity!")
    scv.pp.filter_and_normalize(adata, min_shared_counts=min_shared_counts, n_top_genes=n_top_genes)
    scv.pp.moments(adata, n_pcs=n_pcs, n_neighbors=n_neighbors)
    
    # Compute velocity
    print("\nComputing velocity!")
    scv.tl.velocity(adata, mode='stochastic')
    
    # Build velocity graph
    print("\nBuilding velocity graph!")
    scv.tl.velocity_graph(adata)
    
    # Get the velocity in gene space
    velocity_genes = adata.layers['velocity']
    
    # Get the PCA loadings (genes x PCs)
    pca_loadings = adata.varm['PCs']  # [n_genes, n_pcs]
    
    # Project: velocity_pca = velocity_genes @ pca_loadings
    # Handle sparse matrix if needed
    if hasattr(velocity_genes, 'toarray'):
        velocity_genes = velocity_genes.toarray()
    
    # Replace NaNs with 0 (some genes have no velocity)
    velocity_genes = np.nan_to_num(velocity_genes, nan=0.0)
    
    velocity_pca = velocity_genes @ pca_loadings
    adata.obsm['velocity_pca'] = velocity_pca
    
    print(f"\nVelocity computed!")
    print(f"  Velocity (genes): {adata.layers['velocity'].shape}")
    print(f"  Velocity (PCA):   {velocity_pca.shape}")
    
    return adata


"""
Purpose: Reweight graph edges using velocity alignment
Params: graph object, adata with velocity, alpha hyperparameter
Return: graph with updated edge weights
"""
def reweight_edges(graph, adata, alpha=0.5):
    print("REWEIGHTING EDGES WITH VELOCITY")
    print("-" * 50)
    
    # Get velocity in PCA space
    velocity_pca = adata.obsm['velocity_pca']
    
    # Get graph data as numpy
    x = graph.x.numpy()
    edge_index = graph.edge_index.numpy()
    w_cosine = graph.edge_attr.numpy().flatten()
    
    print(f"\nAlpha: {alpha}")
    print(f"Edges: {edge_index.shape[1]:,}")
    print("Computing velocity alignment")
    
    rows = edge_index[0]
    cols = edge_index[1]
    alignments = np.zeros(edge_index.shape[1])
    
    v_i = velocity_pca[rows]             # [E, d]
    d_ij = x[cols] - x[rows]             # [E, d]

    # dot products for ALL edges
    dots = np.sum(v_i * d_ij, axis=1)

    # norms
    norm_v = np.linalg.norm(v_i, axis=1)
    norm_d = np.linalg.norm(d_ij, axis=1)

    # cosine between velocity and displacement
    alignments = dots / (norm_v * norm_d + 1e-8)
    
    # Compute new weights
    w_new = (1 - alpha) * w_cosine + alpha * alignments
    
    # Print stats
    print(f"\nAlignment stats:")
    print(f"  Range: [{alignments.min():.3f}, {alignments.max():.3f}]")
    print(f"  Mean:  {alignments.mean():.3f}")
    
    print(f"\nEdge weight stats:")
    print(f"  Before: [{w_cosine.min():.3f}, {w_cosine.max():.3f}]")
    print(f"  After:  [{w_new.min():.3f}, {w_new.max():.3f}]")
    
    # Update graph
    graph.edge_attr = torch.tensor(w_new, dtype=torch.float32).unsqueeze(1)
    graph.velocity_alignment = torch.tensor(alignments, dtype=torch.float32)
    
    print(f"\nEdges reweighted!")
    
    return graph


"""
Purpose: Save velocity adata and reweighted graph
Params: adata, graph, dataset name
Return: None
"""
def save_velocity_data(adata, graph, dataset):
    print("\nSAVING NOW")
    
    adata.write(f"{dataset}/{dataset}_velocity_data.h5ad")
    print(f"Saved: {dataset}/{dataset}_velocity_data.h5ad")
    
    torch.save(graph, f"{dataset}/{dataset}_graph_velocity.pt")
    print(f"Saved: {dataset}/{dataset}_graph_velocity.pt")


"""
Purpose: Full pipeline to compute velocity and reweight graph
Params: adata, dataset name, alpha, preprocessing params
Return: adata, graph (both updated)
"""
def velocity_pipeline(adata, dataset, alpha=0.5, min_shared_counts=20, n_top_genes=2000, n_pcs=50, n_neighbors=15):
    # Load existing graph
    graph = torch.load(f"{dataset}/{dataset}_graph.pt", weights_only=False)
    print(f"Loaded graph: {graph.x.shape[0]:,} nodes, {graph.edge_index.shape[1]:,} edges")
    
    # Compute velocity
    adata = compute_velocity(
        adata,
        min_shared_counts=min_shared_counts,
        n_top_genes=n_top_genes,
        n_pcs=n_pcs,
        n_neighbors=n_neighbors
    )
    
    # Reweight edges
    graph = reweight_edges(graph, adata, alpha=alpha)
    
    # Save
    save_velocity_data(adata, graph, dataset)
    
    return adata, graph


In [25]:
# Load the adata (blood)
adata = sc.read_h5ad("blood/blood_data.h5ad")
new_adata, new_graph = velocity_pipeline(adata, 'blood', alpha=0.7)

Loaded graph: 53,268 nodes, 1,343,922 edges
COMPUTING RNA VELOCITY

Preprocessing for velocity!
Filtered out 1179 genes that are detected 20 counts (shared).
Normalized count data: spliced, unspliced.
Skip filtering by dispersion since number of variables are less than `n_top_genes`.


  log1p(adata)


Logarithmized X.
computing moments based on connectivities
    finished (0:00:00) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)

Computing velocity!
computing velocities


  gamma[i] = np.linalg.pinv(A.T.dot(A)).dot(A.T.dot(y[:, i]))


    finished (0:00:03) --> added 
    'velocity', velocity vectors for each individual cell (adata.layers)

Building velocity graph!
computing velocity graph (using 1/8 cores)
    finished (0:00:31) --> added 
    'velocity_graph', sparse matrix with cosine correlations (adata.uns)

Velocity computed!
  Velocity (genes): (53268, 821)
  Velocity (PCA):   (53268, 50)
REWEIGHTING EDGES WITH VELOCITY
--------------------------------------------------

Alpha: 0.7
Edges: 1,343,922
Computing velocity alignment

Alignment stats:
  Range: [-0.820, 0.792]
  Mean:  -0.063

Edge weight stats:
  Before: [0.127, 0.989]
  After:  [-0.402, 0.829]

Edges reweighted!

SAVING NOW
Saved: blood/blood_velocity_data.h5ad
Saved: blood/blood_graph_velocity.pt


In [26]:
# Load the adata (neuron)
adata = sc.read_h5ad("neuron/neuron_data.h5ad")
new_adata, new_graph = velocity_pipeline(adata, 'neuron', alpha=0.7)

Loaded graph: 141,576 nodes, 3,294,318 edges
COMPUTING RNA VELOCITY

Preprocessing for velocity!
Filtered out 1006 genes that are detected 20 counts (shared).
Normalized count data: spliced, unspliced.
Skip filtering by dispersion since number of variables are less than `n_top_genes`.


  log1p(adata)


Logarithmized X.
computing moments based on connectivities
    finished (0:00:03) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)

Computing velocity!
computing velocities


  gamma[i] = np.linalg.pinv(A.T.dot(A)).dot(A.T.dot(y[:, i]))


    finished (0:00:13) --> added 
    'velocity', velocity vectors for each individual cell (adata.layers)

Building velocity graph!
computing velocity graph (using 1/8 cores)
    finished (0:01:36) --> added 
    'velocity_graph', sparse matrix with cosine correlations (adata.uns)

Velocity computed!
  Velocity (genes): (141576, 994)
  Velocity (PCA):   (141576, 50)
REWEIGHTING EDGES WITH VELOCITY
--------------------------------------------------

Alpha: 0.7
Edges: 3,294,318
Computing velocity alignment

Alignment stats:
  Range: [-0.814, 0.780]
  Mean:  -0.071

Edge weight stats:
  Before: [0.112, 0.983]
  After:  [-0.402, 0.818]

Edges reweighted!

SAVING NOW
Saved: neuron/neuron_velocity_data.h5ad
Saved: neuron/neuron_graph_velocity.pt


In [27]:
# Load the adata (mouse)
adata = sc.read_h5ad("mouse/mouse_data.h5ad")
new_adata, new_graph = velocity_pipeline(adata, 'mouse', alpha=0.7)

Loaded graph: 89,267 nodes, 1,961,242 edges
COMPUTING RNA VELOCITY

Preprocessing for velocity!
Filtered out 864 genes that are detected 20 counts (shared).
Normalized count data: spliced, unspliced.
Skip filtering by dispersion since number of variables are less than `n_top_genes`.


  log1p(adata)


Logarithmized X.
computing moments based on connectivities
    finished (0:00:02) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)

Computing velocity!
computing velocities


  gamma[i] = np.linalg.pinv(A.T.dot(A)).dot(A.T.dot(y[:, i]))


    finished (0:00:13) --> added 
    'velocity', velocity vectors for each individual cell (adata.layers)

Building velocity graph!
computing velocity graph (using 1/8 cores)
    finished (0:01:25) --> added 
    'velocity_graph', sparse matrix with cosine correlations (adata.uns)

Velocity computed!
  Velocity (genes): (89267, 1136)
  Velocity (PCA):   (89267, 50)
REWEIGHTING EDGES WITH VELOCITY
--------------------------------------------------

Alpha: 0.7
Edges: 1,961,242
Computing velocity alignment

Alignment stats:
  Range: [-0.764, 0.783]
  Mean:  0.029

Edge weight stats:
  Before: [0.223, 0.998]
  After:  [-0.240, 0.840]

Edges reweighted!

SAVING NOW
Saved: mouse/mouse_velocity_data.h5ad
Saved: mouse/mouse_graph_velocity.pt
