In [1]:
import scanpy as sc
import scvelo as scv
import pandas as pd
import numpy as np
import celldancer as cd
import celldancer.utilities as cdutil

from celldancer.utilities import export_velocity_to_dynamo

SEED = 2024
np.random.seed(SEED)

In [2]:
adata = sc.read_h5ad("adata/redeem_young.h5ad")
print(adata)

AnnData object with n_obs × n_vars = 9144 × 2000
    obs: 'nCount_RNA', 'nFeature_RNA', 'nCount_ATAC', 'nFeature_ATAC', 'nCount_SCT', 'nFeature_SCT', 'SCT.weight', 'ATAC.weight', 'seurat_clusters', 'Sig.HSC1', 'Sig.Prog1', 'Sig.EarlyE1', 'Sig.LateE1', 'Sig.ProMono1', 'Sig.Mono1', 'Sig.ncMono1', 'Sig.cDC1', 'Sig.pDC1', 'Sig.ProB1', 'Sig.PreB1', 'Sig.B1', 'Sig.Plasma1', 'Sig.T1', 'Sig.CTL1', 'Sig.NK1', 'STD.CellType', 'STD_Cat', 'STD_Cat2', 'Sample', 'MitoCoverage', 'ClonalGroup', 'ClonalGroup.Prob', 'nCount_spliced', 'nFeature_spliced', 'nCount_unspliced', 'nFeature_unspliced', 'nCount_ambiguous', 'nFeature_ambiguous', 'CellType', 'initial_size_unspliced', 'initial_size_spliced', 'initial_size', 'n_counts', 'velocity_self_transition'
    var: 'name', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'velocity_gamma', 'velocity_qreg_ratio', 'velocity_r2', 'velocity_genes'
    uns: 'CellType_colors', 'STD.CellType_colors', 'neighbors', 'umap', 'velocity_gra

In [3]:
cdutil.adata_to_df_with_embed(adata,
                              us_para=['Mu','Ms'],
                              # cell_type_para='cell_type',
                              cell_type_para='CellType',
                              embed_para='X_umap',
                              save_path='sup/celldancer_input.csv'
                             )

100%|██████████| 2000/2000 [01:01<00:00, 32.76it/s]


Unnamed: 0,gene_name,unsplice,splice,cellID,clusters,embedding1,embedding2
0,MXRA8,0.000000,0.0,AAACAGCCAAAGCTCC-2,EryP,-4.173855,14.998200
1,MXRA8,0.000000,0.0,AAACAGCCAACTAGGG-2,MDP,3.079441,12.646513
2,MXRA8,0.000000,0.0,AAACAGCCAGGCATCT-2,MEP,-7.128927,12.837410
3,MXRA8,0.000000,0.0,AAACAGCCATTAAAGG-2,GMP,-0.332896,3.222512
4,MXRA8,0.000000,0.0,AAACAGCCATTAGGCC-2,CLP,10.282351,5.714820
...,...,...,...,...,...,...,...
18287995,RPS4Y2,0.021114,0.0,TTTGTGTTCTAAGTCA-2,MPP,0.921454,4.028267
18287996,RPS4Y2,0.223745,0.0,TTTGTGTTCTTAGCGG-2,ProB,15.117271,10.031981
18287997,RPS4Y2,0.039890,0.0,TTTGTGTTCTTAGGGT-2,MDP,2.394049,11.238746
18287998,RPS4Y2,0.026579,0.0,TTTGTTGGTGGTTCTT-2,CLP,10.887867,6.191710


In [4]:
df = pd.read_csv('sup/celldancer_input.csv')
loss_df, cellDancer_df=cd.velocity(df,n_jobs=20,
                                   speed_up = False)
cellDancer_df.to_csv('sup/celldancer_result.csv')

Using /home/liyr/Redeem/RNA_velocity_young2/cellDancer_velocity_2024-08-08 22-19-54 as the output path.
['MXRA8']
Arranging genes for parallel job.
2000  genes were arranged to  100  portions.


Not predicted gene list:['CALML6', 'AL136528.1', 'PIK3CD', 'RBP7', 'AL359771.1', 'CDA', 'EPHB2', 'AL590609.3', 'CATSPER4', 'SFN', 'PTAFR', 'AL009181.1', 'LINC01226', 'AC114489.1', 'GRIK3', 'AC093151.3', 'ARTN', 'DMBX1', 'AC093425.1', 'AC099792.1', 'NFIA', 'NFIA-AS2', 'FOXD3-AS1', 'LEPR', 'AC119800.1', 'PDE4B', 'WLS', 'NEGR1', 'AL513166.1', 'LINC02797', 'ST6GALNAC3', 'ADGRL2', 'LINC01725', 'LMO4', 'PKN2-AS1', 'LRRC8C', 'LINC02788', 'EVI5', 'ARHGAP29-AS1', 'SLC44A3', 'LINC01708', 'AMY2A', 'NBPF6', 'FNDC7', 'PHTF1', 'AC253572.2', 'AL356356.1', 'ADAMTSL4-AS1', 'CTSS', 'S100A9', 'CD5L', 'CD247', 'LINC00626', 'GORAB-AS1', 'BX284613.2', 'FMO2', 'RASAL2', 'ERVMER61-1', 'AL136372.2', 'RGS2', 'AL353072.2', 'NAV1', 'AC092800.1', 'IPO9-AS1', 'SLC26A9', 'IL24', 'FCMR', 'LAMB3', 'G0S2', 'LINC01740', 'AL592402.1', 'AL445423.1', 'AL359979.1', 'CDC42BPA', 'AL356010.2', 'RGS7', 'LINC02774', 'AC105450.1', 'LINC01250', 'AC007463.1', 'LINC00298', 'LINC00299', 'RRM2', 'MIR3681HG', 'KCNS3', 'LINC01376', 'GAL

In [11]:
# check gene
t1 = df['gene_name'].value_counts()

#cellDancer_df = pd.read_csv("cellDancer_velocity_2024-02-29 11-14-06/cellDancer_estimation.csv")
t2 = cellDancer_df['gene_name'].value_counts()

# check lost gene
t = set.difference(set(t1.index.tolist()),set(t2.index.tolist()))

print(t)
print(len(t))

{'SGMS2', 'PCED1B', 'AL162493.1', 'AC079070.1', 'AC105265.3', 'SEMA5B', 'TRGV5', 'AL445423.1', 'PPM1N', 'CASC9', 'AC091010.1', 'ROBO2', 'FBXL7', 'GATA2', 'AC006974.2', 'PPFIBP1', 'AC021037.1', 'SYTL3', 'AL158817.1', 'AC011029.1', 'EPX', 'AC025437.2', 'AC005548.1', 'AC113418.1', 'IGHV1-2', 'ANKFN1', 'ELK3', 'AL513190.1', 'LINC01091', 'AC063943.1', 'EFNA5', 'AL162253.2', 'FAM241A', 'IL1RAP', 'DLGAP2', 'AC011337.1', 'AL359232.1', 'ABTB2', 'PDE4B', 'ME1', 'GSDMC', 'FFAR2', 'EDA', 'ANO3-AS1', 'SHOX2', 'KIAA1211L', 'PHTF1', 'PRKCA', 'CNTNAP2', 'WLS', 'RASAL2', 'AC099499.1', 'CXCL8', 'TLE1', 'EPB41L4A', 'AC079384.2', 'PIK3CD', 'LINC00163', 'PRSS2', 'DISC1FP1', 'LINC00968', 'GRIA2', 'TDO2', 'SVEP1', 'MIR222HG', 'AC025437.4', 'TRPV4', 'FLVCR2', 'AGMO', 'SLC7A14-AS1', 'CRPPA', 'ANKRD30B', 'STK10', 'CNTN4-AS1', 'TIMP3', 'S100A9', 'AC091078.1', 'CLEC1A', 'TLR2', 'PLXNA4', 'FGD4', 'AL646090.2', 'AC108156.1', 'MYO16', 'CALML6', 'CACNB2', 'VPREB3', 'PRDM9', 'AP003469.3', 'IGHA1', 'CFAP299', 'TSPOAP1-

In [8]:
adata_cd = export_velocity_to_dynamo(cellDancer_df,adata)
print(adata_cd)

adata_cd.layers["velocity_S"] = adata_cd.layers["velocity_S"].toarray()

adata_cd.write_h5ad("adata/cellDancer.h5ad")

AnnData object with n_obs × n_vars = 9144 × 2000
    obs: 'nCount_RNA', 'nFeature_RNA', 'nCount_ATAC', 'nFeature_ATAC', 'nCount_SCT', 'nFeature_SCT', 'SCT.weight', 'ATAC.weight', 'seurat_clusters', 'Sig.HSC1', 'Sig.Prog1', 'Sig.EarlyE1', 'Sig.LateE1', 'Sig.ProMono1', 'Sig.Mono1', 'Sig.ncMono1', 'Sig.cDC1', 'Sig.pDC1', 'Sig.ProB1', 'Sig.PreB1', 'Sig.B1', 'Sig.Plasma1', 'Sig.T1', 'Sig.CTL1', 'Sig.NK1', 'STD.CellType', 'STD_Cat', 'STD_Cat2', 'Sample', 'MitoCoverage', 'ClonalGroup', 'ClonalGroup.Prob', 'nCount_spliced', 'nFeature_spliced', 'nCount_unspliced', 'nFeature_unspliced', 'nCount_ambiguous', 'nFeature_ambiguous', 'CellType', 'initial_size_unspliced', 'initial_size_spliced', 'initial_size', 'n_counts', 'velocity_self_transition'
    var: 'name', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'velocity_gamma', 'velocity_qreg_ratio', 'velocity_r2', 'velocity_genes', 'use_for_dynamics', 'use_for_transition'
    uns: 'CellType_colors', 'STD.CellType_c

In [10]:
v = adata_cd.layers["velocity_S"]

cols_to_keep_na = ~np.any(np.isnan(v), axis=0)
cols_to_keep_0 = np.any(v != 0, axis=0)

adata_sub = adata_cd[:,cols_to_keep_0 & cols_to_keep_na]

print(adata_sub)

View of AnnData object with n_obs × n_vars = 9144 × 1003
    obs: 'nCount_RNA', 'nFeature_RNA', 'nCount_ATAC', 'nFeature_ATAC', 'nCount_SCT', 'nFeature_SCT', 'SCT.weight', 'ATAC.weight', 'seurat_clusters', 'Sig.HSC1', 'Sig.Prog1', 'Sig.EarlyE1', 'Sig.LateE1', 'Sig.ProMono1', 'Sig.Mono1', 'Sig.ncMono1', 'Sig.cDC1', 'Sig.pDC1', 'Sig.ProB1', 'Sig.PreB1', 'Sig.B1', 'Sig.Plasma1', 'Sig.T1', 'Sig.CTL1', 'Sig.NK1', 'STD.CellType', 'STD_Cat', 'STD_Cat2', 'Sample', 'MitoCoverage', 'ClonalGroup', 'ClonalGroup.Prob', 'nCount_spliced', 'nFeature_spliced', 'nCount_unspliced', 'nFeature_unspliced', 'nCount_ambiguous', 'nFeature_ambiguous', 'CellType', 'initial_size_unspliced', 'initial_size_spliced', 'initial_size', 'n_counts', 'velocity_self_transition'
    var: 'name', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'velocity_gamma', 'velocity_qreg_ratio', 'velocity_r2', 'velocity_genes', 'use_for_dynamics', 'use_for_transition'
    uns: 'CellType_colors', 'STD.Ce