# Calculate velocity and latent time using cell2fate

## Library imports

In [1]:
#Import needed packages
import cell2fate as c2f
import scanpy as sc
import matplotlib.pyplot as plt
import os
import numpy as np
import torch
import anndata as ad, scvelo as scv
import contextlib, io
sys.path.append("../..")
from paths import DATA_DIR, FIG_DIR

Global seed set to 0


## General settings

In [2]:
SAVE_FIGURES = True
if SAVE_FIGURES:
    os.makedirs(FIG_DIR / 'simulation', exist_ok=True)

SAVE_DATASETS = True
if SAVE_DATASETS:
    os.makedirs(DATA_DIR / 'simulation', exist_ok=True)

In [3]:
input_path = DATA_DIR
output_path = DATA_DIR / 'simulation'
input_files = os.listdir(input_path)

## Function definitions

In [4]:
#Function for train model and get output
def trainc2fmodel(adatafile, input_path, output_path):
    adata =sc.read_h5ad(os.path.join(input_path, adatafile))
    adata.layers['spliced'] = adata.layers['counts_spliced'].A.copy()
    adata.layers['unspliced'] = adata.layers['counts_unspliced'].A.copy()
    adata.X = adata.X.A
    adata.layers['raw_spliced']   = adata.layers['spliced']
    adata.layers['raw_unspliced'] = adata.layers['unspliced']
    adata.obs['u_lib_size_raw'] = adata.layers['raw_unspliced'].sum(-1)
    adata.obs['s_lib_size_raw'] = adata.layers['raw_spliced'].sum(-1)
    scv.pp.filter_and_normalize(adata, min_shared_counts=10, n_top_genes=90)
    sc.tl.pca(adata)
    sc.pp.neighbors(adata)
    sc.tl.umap(adata)
    sc.tl.leiden(adata)
    #scv.pp.moments(adata, n_pcs=30, n_neighbors=30)
    clusters_to_remove = []
    adata.layers['spliced'] = adata.layers['counts_spliced'].A.copy()
    adata.layers['unspliced'] = adata.layers['counts_unspliced'].A.copy()
    adata =  c2f.utils.get_training_data(adata, cells_per_cluster = 100, cluster_column = 'leiden',
                                        remove_clusters = [],
                                    min_shared_counts = 10, n_var_genes= 90)
    c2f.Cell2fate_DynamicalModel.setup_anndata(adata, spliced_label='spliced', unspliced_label='unspliced')
    n_modules = c2f.utils.get_max_modules(adata)
    mod = c2f.Cell2fate_DynamicalModel(adata, n_modules = n_modules)
    mod.train()
    #Compute total velocity
    n_modules = c2f.utils.get_max_modules(adata)
    c2f.Cell2fate_DynamicalModel.setup_anndata(adata, spliced_label='spliced', unspliced_label='unspliced')
    data = mod.export_posterior(adata)
    adata = mod.compute_module_summary_statistics(adata)
    with contextlib.redirect_stdout(io.StringIO()):
        adata.layers['Spliced Mean'] = mod.samples['post_sample_means']['mu_expression'][...,1]
        c2f_velocity = torch.tensor(mod.samples['post_sample_means']['beta_g']) * \
        mod.samples['post_sample_means']['mu_expression'][...,0] - \
        torch.tensor(mod.samples['post_sample_means']['gamma_g']) * \
        mod.samples['post_sample_means']['mu_expression'][...,1]
        adata.layers['Velocity'] = c2f_velocity.numpy()
    return(adata)

## Data loading and processing of one instance

In [5]:
adata = trainc2fmodel(input_files[0], input_path, output_path)

Normalized count data: X, spliced, unspliced.
Extracted 90 highly variable genes.
Keeping at most 100 cells per cluster
Extracted 90 highly variable genes.


No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)


Leiden clustering ...
Number of Leiden Clusters: 3
Maximal Number of Modules: 3


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.


Epoch 500/500: 100%|██████████| 500/500 [01:23<00:00,  5.96it/s, v_num=1, elbo_train=7.44e+4]
Leiden clustering ...
Number of Leiden Clusters: 3
Maximal Number of Modules: 3
Sampling local variables, batch: 100%|██████████| 1/1 [00:02<00:00,  2.52s/it]
Sampling global variables, sample: 100%|██████████| 29/29 [00:01<00:00, 22.69it/s]


In [6]:
#obs: Time (hours) and layers: Velocity are your results
adata

AnnData object with n_obs × n_vars = 293 × 90
    obs: 'step_ix', 'simulation_i', 'sim_time', 'u_lib_size_raw', 's_lib_size_raw', 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'n_counts', 'leiden', '_indices', '_scvi_batch', 'Time (hours)', 'Time Uncertainty (sd)', 'Module 0 Activation', 'Module 0 State', 'Module 1 Activation', 'Module 1 State', 'Module 2 Activation', 'Module 2 State'
    var: 'module_id', 'basal', 'burn', 'independence', 'color', 'is_tf', 'is_hk', 'transcription_rate', 'splicing_rate', 'translation_rate', 'mrna_halflife', 'protein_halflife', 'mrna_decay_rate', 'protein_decay_rate', 'max_premrna', 'max_mrna', 'max_protein', 'mol_premrna', 'mol_mrna', 'mol_protein', 'means', 'dispersions', 'dispersions_norm', 'highly_variable'
    uns: 'network', 'regulators', 'regulatory_network', 'regulatory_network_regulators', 'regulatory_network_targets', 'skeleton', 'targets', 'traj_dimred_segments', 'traj_milestone_network', 'traj_progressions', 'pca', 'neighb

In [7]:
# save the results
if SAVE_DATASETS:
    adata.write_h5ad(DATA_DIR / 'simulation' / 'c2f_output.h5ad')