In [2]:
import latentvelo as ltv
import numpy as np
import scanpy as sc
import scvelo as scv
import pandas as pd
import gc
import time
np.random.seed(2024)

adata = scv.read("/home/liyr/hpz/real_data_graph/Forebrain/LatentVelo.h5ad")

adata.var['velocity_genes'] = True
adata.obs['clusters'] = pd.Categorical(adata.obs['clusters'],categories=['Radial Glia', 'Neuroblast','Immature Neuron', 'Neuron'], ordered=True)
#adata.obs['exp_time'] = np.array(adata.obs['clusters'].cat.codes)
#adata.obs['exp_time'] = adata.obs['exp_time']/adata.obs['exp_time'].max()


spliced_key = 'spliced'
unspliced_key = 'unspliced'

spliced_library_sizes = adata.layers[spliced_key].sum(1)
unspliced_library_sizes = adata.layers[unspliced_key].sum(1)

if len(spliced_library_sizes.shape) == 1: 
    spliced_library_sizes = spliced_library_sizes[:,None]
if len(unspliced_library_sizes.shape) == 1: 
    unspliced_library_sizes = unspliced_library_sizes[:,None]

adata.obs['spliced_size_factor'] = spliced_library_sizes #spliced_all_size_factors
adata.obs['unspliced_size_factor'] = unspliced_library_sizes #unspliced_all_size_factors

model = ltv.models.VAE(observed = adata.n_vars) # observed: number of genes
epochs, val_ae, val_traj = ltv.train(model, adata,name='simulation_mono')
        
latent_adata, adata = ltv.output_results(model, adata, gene_velocity=True)
z_traj, times = ltv.cell_trajectories(model, adata)

2000 velocity genes used
epoch 0, full loss 342.208, val loss 264.259, recon MSE 5.277, traj MSE 2.452, reg loss -2.041
epoch 1, full loss 255.098, val loss 201.874, recon MSE 1.817, traj MSE 1.509, reg loss -2.330
epoch 2, full loss 208.012, val loss 173.425, recon MSE 2.008, traj MSE 1.149, reg loss -2.495
epoch 3, full loss 181.666, val loss 146.723, recon MSE 1.656, traj MSE 1.070, reg loss -2.558
epoch 4, full loss 146.250, val loss 119.723, recon MSE 1.838, traj MSE 1.045, reg loss -2.661
epoch 5, full loss 116.983, val loss 93.207, recon MSE 1.686, traj MSE 1.036, reg loss -2.720
epoch 6, full loss 85.141, val loss 66.199, recon MSE 1.613, traj MSE 1.032, reg loss -2.815
epoch 7, full loss 54.482, val loss 39.294, recon MSE 1.398, traj MSE 1.030, reg loss -2.887
epoch 8, full loss 23.737, val loss 10.555, recon MSE 1.219, traj MSE 1.030, reg loss -2.060
epoch 9, full loss -6.799, val loss -18.408, recon MSE 1.073, traj MSE 1.012, reg loss -1.879
epoch 10, full loss -38.179, val 

In [3]:
adata.layers['velocity'] = adata.layers['velo']

In [4]:
np.random.seed(2024)
list_df = []
for i in ['Immature Neuron','Neuroblast','Neuron','Radial Glia']:
    surface_cells = np.where(latent_adata.obs.clusters.isin([i]))[0]
    #surface_cells = surface_cells[np.random.choice(len(surface_cells), size=100, replace=False)]
    xhat_surface = ltv.tl.cell_trajectories(z_traj[surface_cells], times[surface_cells], latent_adata, adata, surface_cells, 0.25)
    surface_df = pd.DataFrame(xhat_surface, columns=adata.var.index.values)

    surface_df['clusters'] = i
    list_df.append(surface_df)
df = pd.concat(list_df)
df

Unnamed: 0,CDK11B,LINC00982,CEP104,RERE,ENO1,LZIC,MTOR,DHRS3,ATP13A2,RCC2,...,WDR44,GRIA3,RAB33A,LINC00632,AFF2,HMGB3,GDI1,PCDH11Y,DDX3Y,clusters
0,0.016512,0.000000,0.028449,0.090379,0.186424,0.027977,0.027892,0.000000,0.130918,0.020485,...,0.025961,0.074761,0.112933,0.005152,0.013255,0.166676,0.412796,0.000000,0.026657,Immature Neuron
1,0.016512,0.000000,0.028449,0.090379,0.186424,0.027977,0.027892,0.000000,0.130918,0.020485,...,0.025961,0.074761,0.112933,0.005152,0.013255,0.166676,0.412796,0.000000,0.026657,Immature Neuron
2,0.026353,0.000000,0.040429,0.147367,0.166600,0.058250,0.030922,0.000000,0.092936,0.011462,...,0.024960,0.102460,0.167599,0.004626,0.005686,0.179902,0.344754,0.003223,0.032272,Immature Neuron
3,0.026353,0.000000,0.040429,0.147367,0.166600,0.058250,0.030922,0.000000,0.092936,0.011462,...,0.024960,0.102460,0.167599,0.004626,0.005686,0.179902,0.344754,0.003223,0.032272,Immature Neuron
4,0.038448,0.000000,0.028653,0.105894,0.187071,0.093507,0.015828,0.000000,0.080902,0.025393,...,0.031350,0.110897,0.186254,0.009699,0.008310,0.142592,0.331819,0.006758,0.047084,Immature Neuron
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16099,0.026258,0.004295,0.042906,0.157160,0.452454,0.085320,0.012349,0.069111,0.041184,0.040413,...,0.000000,0.012109,0.018822,0.022021,0.000000,0.231765,0.131174,0.000000,0.068234,Radial Glia
16100,0.019938,0.005847,0.016291,0.086376,0.577310,0.048018,0.017231,0.125206,0.032764,0.073434,...,0.000000,0.000000,0.017502,0.021853,0.009320,0.212488,0.087141,0.000000,0.015771,Radial Glia
16101,0.019938,0.005847,0.016291,0.086376,0.577310,0.048018,0.017231,0.125206,0.032764,0.073434,...,0.000000,0.000000,0.017502,0.021853,0.009320,0.212488,0.087141,0.000000,0.015771,Radial Glia
16102,0.019938,0.005847,0.016291,0.086376,0.577310,0.048018,0.017231,0.125206,0.032764,0.073434,...,0.000000,0.000000,0.017502,0.021853,0.009320,0.212488,0.087141,0.000000,0.015771,Radial Glia


In [5]:
np.random.seed(2024)
list_degs = []
for i in ['Immature Neuron','Neuroblast','Neuron','Radial Glia']:
    ery_de = ltv.tl.de_genes(adata, df, i, celltype_key='clusters', mode='greater')
    ery_de['clusters'] = i
    ery_de = ery_de[ery_de["pval"]<0.05]
    list_degs.append(ery_de)
degs_all = pd.concat(list_degs)

In [6]:
degs_all.to_csv("/home/liyr/zxc/top_like_genes/res/LatentVelo_all_new_0.25.csv")