In [1]:
import scanpy as sc
import scvelo as scv
import os
import sys
import glob
import pandas as pd
import math
import matplotlib.pyplot as plt
import anndata as ad
import numpy as np
import seaborn as sns
import celldancer as cd
import celldancer.simulation as cdsim
import celldancer.utilities as cdutil
import celldancer.cdplt as cdplt
from celldancer.cdplt import colormap
from celldancer.utilities import export_velocity_to_dynamo

from scipy.spatial.distance import cosine
from sklearn.metrics.pairwise import cosine_similarity


SEED = 2024
np.random.seed(SEED)


In [2]:
adata = sc.read_h5ad("LSK_lineage.h5ad")
print(adata)

AnnData object with n_obs × n_vars = 3186 × 2000
    obs: 'nCount_RNA', 'nFeature_RNA', 'percent.mt', 'nCount_SCT', 'nFeature_SCT', 'sample', 'S.Score', 'G2M.Score', 'Phase', 'integrated_snn_res.0.5', 'seurat_clusters', 'palantir_clusters', 'mono1', 'neu2', 'dc3', 'baso4', 'ery5', 'eos6', 'mep7', 'gmp8', 'cell_type', 'integrated_snn_res.0.4', 'integrated_snn_res.2', 'cell_type2', 'DF_score', 'DF_class', 'orig.lib', 'nCount_spliced', 'nFeature_spliced', 'nCount_unspliced', 'nFeature_unspliced', 'nCount_ambiguous', 'nFeature_ambiguous', 'celltype', 'initial_size_unspliced', 'initial_size_spliced', 'initial_size', 'n_counts', 'velocity_self_transition'
    var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable', 'highly_variable_genes', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'velocity_gamma', 'velocity_qreg_ratio', 'velocity_r2', 'velocity_genes'
    uns: 'cell1_list', 'cell2_list_exp', 'cell_type2_co

In [3]:
cdutil.adata_to_df_with_embed(adata,
                              us_para=['Mu','Ms'],
                              # cell_type_para='cell_type',
                              cell_type_para='celltype',
                              embed_para='X_umap',
                              save_path='celldancer_input.csv'
                             )

100%|██████████| 2000/2000 [00:49<00:00, 40.10it/s]


Unnamed: 0,gene_name,unsplice,splice,cellID,clusters,embedding1,embedding2
0,Ppp1r42,0.017068,0.000000,d2_5-CTGGACGCAGTCGGAA-1-rna,Other,2.654214,-2.985379
1,Ppp1r42,0.878685,0.160646,d5r1-TCGCTCAAGGGTGAAA-2-rna,Neu,3.380262,5.017480
2,Ppp1r42,0.000000,0.000000,d5r2-TCATTCAAGCCGATTT-3-rna,Mono,-3.991801,4.837319
3,Ppp1r42,0.047938,0.000000,d5r1-CTGCATCGTGCATCTA-2-rna,Neu,2.414526,1.458959
4,Ppp1r42,0.019506,0.000000,d5r2-TGGTTAGAGGGACAGG-3-rna,Neu,3.349718,1.553082
...,...,...,...,...,...,...,...
6371995,mt-Nd6,0.000000,0.000000,d5r2-GGATGTTCATCGTGGC-1-rna,Other,3.649732,-2.549206
6371996,mt-Nd6,0.000000,0.000000,d2_5-CACTGGGAGATTGCGG-1-rna,Other,3.789565,-2.309722
6371997,mt-Nd6,0.000000,0.000000,d5r2-TCCTCGAAGCCGAACA-4-rna,Neu,3.009244,1.417159
6371998,mt-Nd6,0.000000,0.000000,d5r1-CAATACGCACGCCACA-2-rna,Neu,3.087342,1.511211


In [4]:
df = pd.read_csv('celldancer_input.csv')
loss_df, cellDancer_df=cd.velocity(df,n_jobs=15,
                                   speed_up = False)

Using /media/liyaru/LYR/Diff_change/7_lineage_tracing_multitag/cellDancer_velocity_2024-06-03 22-56-59 as the output path.
Arranging genes for parallel job.
2000  genes were arranged to  134  portions.


Not predicted gene list:['Gdap1', 'Khdc1a', 'Gpr45', 'Cavin2', 'Ankar', 'Inha', '5730419F03Rik', '1700067G17Rik', 'Olfr12', 'Gm29480', 'Gm28086', 'Gm28187', 'Lypd1', 'Acmsd', 'C4bp', 'Gm15848', 'Slc45a3', 'Shisa4', 'Tnni1', 'Lad1', 'Pkp1', 'Lhx9', 'Gm47985', 'Ackr1', 'Lefty2', 'Esrrg', 'Prox1', 'Fam71a', 'G0s2', 'Bmyc', 'Cutal', 'Rprm', 'Gm13580', 'Gm19426', 'Prg2', 'Prg3', 'Olfr1258', 'Agbl2', 'Alx4', 'Gm10800', 'Gm13872', 'Kcna4', 'Ano3', 'Thbd', 'Gm14161', 'Foxs1', 'Gm45609', 'Mafb', 'Hnf4a', 'Mmp9', 'Cyp24a1', 'Sox18', '6030498E09Rik', 'Dcaf12l2', 'Dcaf12l1', 'Ccdc160', 'Rtl8c', 'Gm648', '4930550L24Rik', '1700010D01Rik', 'Dgat2l6', 'Cited1', 'Nxf7', 'Gprasp2', 'Bex2', 'Tceal5', 'Bex1', 'Tceal1', 'Kcne1l', 'Gm15104', 'Adgrg2', 'Gm15241', 'Gm15262', 'Grpr', 'Gm15245', 'Gm16685', 'Bhlhe22', '1700017M07Rik', 'Mccc1os', 'Ccdc144b', 'Pcdh10', 'Postn', 'Gm26671', 'Clrn1', 'Ptx3', '4930535E02Rik', 'Serpini2', 'Gm6525', 'Kirrel', 'Crabp2', 'Gm35439', 'Ivl', 'Hist2h2ac', 'Ankrd34a', 'Adora3'

In [5]:
# check gene
t1 = df['gene_name'].value_counts()

#cellDancer_df = pd.read_csv("cellDancer_velocity_2024-02-29 11-14-06/cellDancer_estimation.csv")
t2 = cellDancer_df['gene_name'].value_counts()

# check lost gene
t = set.difference(set(t1.index.tolist()),set(t2.index.tolist()))

In [6]:
cellDancer_df=cd.compute_cell_velocity(cellDancer_df=cellDancer_df,
                                       supname= "LSK")

<class 'numpy.ndarray'>
[[0.00000000e+00 2.73378850e-03 2.44733969e-03 ... 1.95462190e-04
  8.92503790e-03 1.90696090e-04]
 [5.89944681e-09 0.00000000e+00 7.02218234e-09 ... 3.46166154e-04
  8.96764514e-05 4.98491215e-04]
 [2.18215330e-07 9.71899440e-03 0.00000000e+00 ... 2.80906033e-05
  2.36393542e-02 2.78147149e-05]
 ...
 [0.00000000e+00 1.18847883e-13 4.77697895e-14 ... 0.00000000e+00
  1.32936241e-13 0.00000000e+00]
 [0.00000000e+00 6.58984920e-06 1.98988897e-08 ... 2.92700287e-05
  0.00000000e+00 0.00000000e+00]
 [5.21443528e-14 1.18477947e-13 5.81571155e-14 ... 7.00664390e-14
  1.31973230e-13 0.00000000e+00]]
(812, 812)
Other_gene


In [7]:
cellDancer_df.to_csv('celldancer_result.csv')

In [8]:
cellDancer_df = pd.read_csv('celldancer_result.csv')
adata_cd = export_velocity_to_dynamo(cellDancer_df,adata)
print(adata_cd)
adata_cd.write_h5ad("adata/cellDancer.h5ad")

AnnData object with n_obs × n_vars = 3186 × 2000
    obs: 'nCount_RNA', 'nFeature_RNA', 'percent.mt', 'nCount_SCT', 'nFeature_SCT', 'sample', 'S.Score', 'G2M.Score', 'Phase', 'integrated_snn_res.0.5', 'seurat_clusters', 'palantir_clusters', 'mono1', 'neu2', 'dc3', 'baso4', 'ery5', 'eos6', 'mep7', 'gmp8', 'cell_type', 'integrated_snn_res.0.4', 'integrated_snn_res.2', 'cell_type2', 'DF_score', 'DF_class', 'orig.lib', 'nCount_spliced', 'nFeature_spliced', 'nCount_unspliced', 'nFeature_unspliced', 'nCount_ambiguous', 'nFeature_ambiguous', 'celltype', 'initial_size_unspliced', 'initial_size_spliced', 'initial_size', 'n_counts', 'velocity_self_transition'
    var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable', 'highly_variable_genes', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'velocity_gamma', 'velocity_qreg_ratio', 'velocity_r2', 'velocity_genes', 'use_for_dynamics', 'use_for_transition'
    uns: 'cel