In [1]:
import latentvelo as ltv
import numpy as np
import scanpy as sc
import scvelo as scv
import matplotlib.pyplot as plt
import os
import sys
import pandas as pd
import anndata as ad

SEED = 2024
np.random.seed(SEED)

Global seed set to 0


In [2]:
adata = sc.read_h5ad("LSK_lineage.h5ad")
print(adata)

AnnData object with n_obs × n_vars = 3186 × 2000
    obs: 'nCount_RNA', 'nFeature_RNA', 'percent.mt', 'nCount_SCT', 'nFeature_SCT', 'sample', 'S.Score', 'G2M.Score', 'Phase', 'integrated_snn_res.0.5', 'seurat_clusters', 'palantir_clusters', 'mono1', 'neu2', 'dc3', 'baso4', 'ery5', 'eos6', 'mep7', 'gmp8', 'cell_type', 'integrated_snn_res.0.4', 'integrated_snn_res.2', 'cell_type2', 'DF_score', 'DF_class', 'orig.lib', 'nCount_spliced', 'nFeature_spliced', 'nCount_unspliced', 'nFeature_unspliced', 'nCount_ambiguous', 'nFeature_ambiguous', 'celltype', 'initial_size_unspliced', 'initial_size_spliced', 'initial_size', 'n_counts', 'velocity_self_transition'
    var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable', 'highly_variable_genes', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'velocity_gamma', 'velocity_qreg_ratio', 'velocity_r2', 'velocity_genes'
    uns: 'cell1_list', 'cell2_list_exp', 'cell_type2_co

In [3]:
# adata = ltv.utils.standard_clean_recipe(adata,normalize_library = False)
adata = ltv.utils.standard_clean_recipe(adata)
adata.var['velocity_genes'] = True

spliced_key = 'spliced'
unspliced_key = 'unspliced'

spliced_library_sizes = adata.layers[spliced_key].sum(1)
unspliced_library_sizes = adata.layers[unspliced_key].sum(1)

if len(spliced_library_sizes.shape) == 1:
       spliced_library_sizes = spliced_library_sizes[:,None]
if len(unspliced_library_sizes.shape) == 1:
       unspliced_library_sizes = unspliced_library_sizes[:,None]

adata.obs['spliced_size_factor'] = spliced_library_sizes #spliced_all_size_factors
adata.obs['unspliced_size_factor'] = unspliced_library_sizes #unspliced_all_size_factors

model = ltv.models.VAE(observed = 2000) # observed: number of genes
epochs, val_ae, val_traj = ltv.train(model, adata,name="scNT")


Extracted 2000 highly variable genes.
Choosing top 2000 genes
computing neighbors
    finished (0:00:05) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:00) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
2000 velocity genes used
epoch 0, full loss 275.592, val loss 157.968, recon MSE 3.586, traj MSE 2.117, reg loss -1.964
epoch 1, full loss 154.353, val loss 107.645, recon MSE 2.699, traj MSE 1.745, reg loss -2.174
epoch 2, full loss 119.581, val loss 79.220, recon MSE 2.485, traj MSE 1.669, reg loss -2.444
epoch 3, full loss 89.851, val loss 50.775, recon MSE 2.122, traj MSE 1.436, reg loss -2.659
epoch 4, full loss 59.716, val loss 21.770, recon MSE 2.013, traj MSE 1.322, reg loss -1.713
epoch 5, full loss 28.890, val loss -8.380, recon MSE 1.658, traj MSE 1.185, reg loss -2.717
epoch 6, full loss 1.823, val loss -38.483, recon MSE 1.520, traj MSE 1.16

In [4]:
latent_adata, adata = ltv.output_results(model, adata, gene_velocity=True,
                                         embedding='umap')


In [5]:
print(adata)
print(latent_adata)

AnnData object with n_obs × n_vars = 3186 × 2000
    obs: 'nCount_RNA', 'nFeature_RNA', 'percent.mt', 'nCount_SCT', 'nFeature_SCT', 'sample', 'S.Score', 'G2M.Score', 'Phase', 'integrated_snn_res.0.5', 'seurat_clusters', 'palantir_clusters', 'mono1', 'neu2', 'dc3', 'baso4', 'ery5', 'eos6', 'mep7', 'gmp8', 'cell_type', 'integrated_snn_res.0.4', 'integrated_snn_res.2', 'cell_type2', 'DF_score', 'DF_class', 'orig.lib', 'nCount_spliced', 'nFeature_spliced', 'nCount_unspliced', 'nFeature_unspliced', 'nCount_ambiguous', 'nFeature_ambiguous', 'celltype', 'initial_size_unspliced', 'initial_size_spliced', 'initial_size', 'n_counts', 'velocity_self_transition', 'spliced_size_factor', 'unspliced_size_factor', 'batch_id', 'celltype_id', 'root', 'terminal', 'latent_time'
    var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable', 'highly_variable_genes', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'velocity_gamma', 

In [6]:
adata.write_h5ad("adata/LatentVelo.h5ad")

In [7]:
latent_adata.write_h5ad("LatentVelo_latent.h5ad")