In [1]:
import numpy as np
import scvelo as scv
import torch
from umap import UMAP
from sklearn.decomposition import PCA
from scipy.stats import mannwhitneyu
import scanpy as sc
import pandas as pd
import os
import sys
import anndata as ad
from deepvelo.utils.scatter import scatter
from deepvelo.utils.preprocess import autoset_coeff_s
from deepvelo.utils.plot import statplot, compare_plot
from deepvelo import train, Constants
from deepvelo.utils import (
    velocity,
    velocity_confidence,
    continuity_confidence,
    update_dict,
    cross_boundary_correctness,
)
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity

SEED = 2024
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(SEED)

  from .autonotebook import tqdm as notebook_tqdm
Using backend: pytorch


In [2]:
adata = sc.read_h5ad("LSK_lineage.h5ad")
print(adata)

AnnData object with n_obs × n_vars = 3186 × 2000
    obs: 'nCount_RNA', 'nFeature_RNA', 'percent.mt', 'nCount_SCT', 'nFeature_SCT', 'sample', 'S.Score', 'G2M.Score', 'Phase', 'integrated_snn_res.0.5', 'seurat_clusters', 'palantir_clusters', 'mono1', 'neu2', 'dc3', 'baso4', 'ery5', 'eos6', 'mep7', 'gmp8', 'cell_type', 'integrated_snn_res.0.4', 'integrated_snn_res.2', 'cell_type2', 'DF_score', 'DF_class', 'orig.lib', 'nCount_spliced', 'nFeature_spliced', 'nCount_unspliced', 'nFeature_unspliced', 'nCount_ambiguous', 'nFeature_ambiguous', 'celltype', 'initial_size_unspliced', 'initial_size_spliced', 'initial_size', 'n_counts', 'velocity_self_transition'
    var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable', 'highly_variable_genes', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'velocity_gamma', 'velocity_qreg_ratio', 'velocity_r2', 'velocity_genes'
    uns: 'cell1_list', 'cell2_list_exp', 'cell_type2_co

In [3]:
configs = {
    "name": "DeepVelo", # name of the experiment
    'n_gpu': 0,
    "loss": {"args": {"coeff_s": autoset_coeff_s(adata),
                     'inner_batch_size': 100}},
    "arch":{'args': {'pred_unspliced': True}},
    "trainer": {"verbosity": 0}, # increase verbosity to show training progress
}
configs = update_dict(Constants.default_configs, configs)

# initial velocity
# velocity(adata, mask_zero=False)
trainer = train(adata, configs)

The ratio of spliced reads is 84.6% (between 70% and 85%). Suggest using coeff_s 0.75.
building graph


INFO:train:Beginning training of DeepVelo ...


velo data shape: torch.Size([3186, 2000])
confidence mse shape: (3186, 4000)
confidence mse stats: max 313208.4375, min 0.0, mean 3.2620129585266113, std 417.6153564453125
confidence corr shape: (2000,)
confidence corr stats: max 0.9838609099388123, min -0.5351807475090027, mean 0.3936338722705841, std 0.23424574732780457
velo_mat shape: (3186, 2000)
--> added 'velocity_unspliced' (adata.layers)
--> added 'velocity' (adata.layers)
--> added 'cell_specific_beta' (adata.layers)
--> added 'cell_specific_gamma' (adata.layers)
--> added 'cell_specific_alpha' (adata.layers)


In [4]:
print(adata)
adata.write_h5ad("adata/DeepVelo_GB.h5ad")

AnnData object with n_obs × n_vars = 3186 × 2000
    obs: 'nCount_RNA', 'nFeature_RNA', 'percent.mt', 'nCount_SCT', 'nFeature_SCT', 'sample', 'S.Score', 'G2M.Score', 'Phase', 'integrated_snn_res.0.5', 'seurat_clusters', 'palantir_clusters', 'mono1', 'neu2', 'dc3', 'baso4', 'ery5', 'eos6', 'mep7', 'gmp8', 'cell_type', 'integrated_snn_res.0.4', 'integrated_snn_res.2', 'cell_type2', 'DF_score', 'DF_class', 'orig.lib', 'nCount_spliced', 'nFeature_spliced', 'nCount_unspliced', 'nFeature_unspliced', 'nCount_ambiguous', 'nFeature_ambiguous', 'celltype', 'initial_size_unspliced', 'initial_size_spliced', 'initial_size', 'n_counts', 'velocity_self_transition'
    var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable', 'highly_variable_genes', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'velocity_gamma', 'velocity_qreg_ratio', 'velocity_r2', 'velocity_genes'
    uns: 'cell1_list', 'cell2_list_exp', 'cell_type2_co