## Run TFvelo

In [1]:
import pandas as pd
import anndata as ad
import scanpy as sc
import TFvelo as TFv

import numpy as np

import scvelo as scv
import matplotlib

# matplotlib.use('AGG')
import os, sys
import scipy

np.set_printoptions(suppress=True)
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

# from _calculation import get_gams
sys.path.append("../..")
from paths import DATA_DIR, FIG_DIR

## Load the dataset and preprocessing according to input requirements of TFvelo

In [2]:
adata = sc.read("RegVelo_datasets/cell cycle/adata.h5ad")
adata.X = adata.layers["spliced"].copy()

del adata.layers["ambiguous"]
del adata.layers["matrix"]
del adata.layers["spanning"]

adata.obs.drop(
    [
        "Well_Plate",
        "plate",
        "MeanGreen530",
        "MeanRed585",
        "initial_size_unspliced",
        "initial_size_spliced",
        "initial_size",
    ],
    axis=1,
    inplace=True,
)
adata.var_names = adata.var["name"].values
adata.var.drop(adata.var.columns, axis=1, inplace=True)
adata.obs["pseudo_clusters"] = pd.cut(adata.obs["fucci_time"], bins=5, labels=False).astype(str).astype("category")
adata.obs["pseudo_clusters_equal_size"] = pd.qcut(adata.obs["fucci_time"], q=5, labels=False)
adata.obs["pseudo_clusters_equal_size_num"] = adata.obs["pseudo_clusters_equal_size"].astype(float)
adata.obs["cell_cycle_rad"] = adata.obs["fucci_time"] / adata.obs["fucci_time"].max() * 2 * np.pi
adata.uns["genes_all"] = np.array(adata.var_names)

if "spliced" in adata.layers:
    adata.layers["total"] = adata.layers["spliced"].todense() + adata.layers["unspliced"].todense()
elif "new" in adata.layers:
    adata.layers["total"] = np.array(adata.layers["total"].todense())
else:
    adata.layers["total"] = adata.X
adata.layers["total_raw"] = adata.layers["total"].copy()
n_cells, n_genes = adata.X.shape
sc.pp.filter_genes(adata, min_cells=int(n_cells / 50))
sc.pp.filter_cells(adata, min_genes=int(n_genes / 50))
TFv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000, log=True)  # include the following steps
adata.X = adata.layers["total"].copy()

Normalized count data: X, spliced, unspliced, total.
Extracted 2000 highly variable genes.
Logarithmized X.


## Running TFvelo with default settings and save the output

In [3]:
gene_names = []
for tmp in adata.var_names:
    gene_names.append(tmp.upper())
adata.var_names = gene_names
adata.var_names_make_unique()
adata.obs_names_make_unique()

TFv.pp.moments(adata, n_pcs=30)

TFv.pp.get_TFs(adata, databases="ENCODE ChEA")
adata.uns["genes_pp"] = np.array(adata.var_names)
TFv.tl.recover_dynamics(
    adata,
    n_jobs=16,
    max_iter=20,
    var_names="all",
    WX_method="lsq_linear",
    WX_thres=20,
    n_top_genes=2000,
    fit_scaling=True,
    use_raw=0,
    init_weight_method="correlation",
    n_time_points=1000,
)

computing neighbors
    finished (0:00:08) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:00) --> added 
    'M_total', moments of total abundances (adata.layers)
Get TFs according to ENCODE ChEA
max_n_TF: 31
mean_n_TF: 14.682
gene num of 0 TF: 70
total num of TFs: 47
recovering dynamics (using 16/48 cores)


  0%|          | 0/2000 [00:00<?, ?gene/s]

Processing 0/125 TDRD5
0/125 TDRD5 FINISHED with n_TFs: 9
Processing 1/125 KLHDC8A
1/125 KLHDC8A FINISHED with n_TFs: 14
Processing 2/125 IL24
2/125 IL24 FINISHED with n_TFs: 13
Processing 3/125 KCNF1
3/125 KCNF1 FINISHED with n_TFs: 13
Processing 4/125 ACTG2
4/125 ACTG2 FINISHED with n_TFs: 5
Processing 5/125 XIRP2
5/125 XIRP2 FINISHED with n_TFs: 7
Processing 6/125 IVL
6/125 IVL FINISHED with n_TFs: 14
Processing 7/125 SPRR2D
7/125 SPRR2D FINISHED with n_TFs: 9
Processing 8/125 ARHGAP25
8/125 ARHGAP25 FINISHED with n_TFs: 15
Processing 9/125 S100A9
9/125 S100A9 FINISHED with n_TFs: 9
Processing 10/125 TGFA
10/125 TGFA FINISHED with n_TFs: 20
Processing 11/125 GABRG1
11/125 GABRG1 FINISHED with n_TFs: 5
Processing 12/125 ANTXR2
12/125 ANTXR2 FINISHED with n_TFs: 21
Processing 13/125 PBXIP1
13/125 PBXIP1 FINISHED with n_TFs: 21
Processing 14/125 CLDN1
14/125 CLDN1 FINISHED with n_TFs: 16
Processing 15/125 COL6A3
15/125 COL6A3 FINISHED with n_TFs: 4
Processing 16/125 TMEM169
16/125 TMEM

True

In [4]:
losses = adata.varm["loss"].copy()
losses[np.isnan(losses)] = 1e6
adata.var["min_loss"] = losses.min(1)

n_cells = adata.shape[0]
expanded_scaling_y = np.expand_dims(np.array(adata.var["fit_scaling_y"]), 0).repeat(n_cells, axis=0)
adata.layers["velocity"] = adata.layers["velo_hat"] / expanded_scaling_y

adata.write(DATA_DIR / "cell_cycle" / "TFvelo_cellcycle.h5ad")