In [1]:
import scvelo as scv
import scanpy as sc
import numpy as np
import sklearn
import pandas as pd
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import seaborn as sns

import vae_from_deepvelo_SA as dv

SEED = 2024
np.random.seed(SEED)
tf.random.set_seed(SEED)

tf.config.list_physical_devices('GPU') # check GPU

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [2]:
adata = sc.read_h5ad("adata/redeem_young.h5ad")
adata.var["velocity_genes"] = True
adata_raw = adata.copy()
print(adata)

AnnData object with n_obs × n_vars = 9144 × 2000
    obs: 'nCount_RNA', 'nFeature_RNA', 'nCount_ATAC', 'nFeature_ATAC', 'nCount_SCT', 'nFeature_SCT', 'SCT.weight', 'ATAC.weight', 'seurat_clusters', 'Sig.HSC1', 'Sig.Prog1', 'Sig.EarlyE1', 'Sig.LateE1', 'Sig.ProMono1', 'Sig.Mono1', 'Sig.ncMono1', 'Sig.cDC1', 'Sig.pDC1', 'Sig.ProB1', 'Sig.PreB1', 'Sig.B1', 'Sig.Plasma1', 'Sig.T1', 'Sig.CTL1', 'Sig.NK1', 'STD.CellType', 'STD_Cat', 'STD_Cat2', 'Sample', 'MitoCoverage', 'ClonalGroup', 'ClonalGroup.Prob', 'nCount_spliced', 'nFeature_spliced', 'nCount_unspliced', 'nFeature_unspliced', 'nCount_ambiguous', 'nFeature_ambiguous', 'CellType', 'initial_size_unspliced', 'initial_size_spliced', 'initial_size', 'n_counts', 'velocity_self_transition'
    var: 'name', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'velocity_gamma', 'velocity_qreg_ratio', 'velocity_r2', 'velocity_genes'
    uns: 'CellType_colors', 'STD.CellType_colors', 'neighbors', 'umap', 'velocity_gra

In [3]:
# Add noise to data
X = np.tile(adata_raw.X.A[:, adata.var["velocity_genes"]], (5, 1))
Y = np.tile(adata.layers["velocity"][:, adata.var["velocity_genes"]], (5, 1))
noise_sigma = (adata_raw.X.A.std()/70)**2
X[adata_raw.shape[0]:, :] += \
    np.random.normal(0, noise_sigma, X[adata_raw.shape[0]:, :].shape)

XYpath = "sup/DeepVelo_SA_prepropcessed.npz"
np.savez(XYpath, X, Y)

X = np.load(XYpath)["arr_0"]
Y = np.load(XYpath)["arr_1"]

X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, 
                                                    Y, 
                                                    test_size=0.1, 
                                                    random_state=2024 # set 2024
                                                    )


In [4]:
encoder = dv.create_encoder(X.shape[1])
decoder = dv.create_decoder(X.shape[1])

autoencoder = dv.VAE(encoder, decoder)
opt = keras.optimizers.Adam(learning_rate = 0.00005) # default: learning_rate 0.001; in deepvelo tutorial 0.00005
autoencoder.compile(optimizer=opt)

es = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)  # as tutorial figure2 set
autoencoder.fit(X_train, y_train,
        epochs=100, # as tutorial figure2 set
        batch_size=2, # as tutorial figure2 set
        shuffle=True, # as tutorial figure2 set
        validation_data=(X_test, y_test),
        callbacks=[es])

2024-08-12 16:09:47.154466: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-08-12 16:09:47.799487: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 33665 MB memory:  -> device: 0, name: NVIDIA A100-PCIE-40GB, pci bus id: 0000:31:00.0, compute capability: 8.0


Model: "encoder"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 2000)]       0                                            
__________________________________________________________________________________________________
dense (Dense)                   (None, 64)           128064      input_1[0][0]                    
__________________________________________________________________________________________________
z_mean (Dense)                  (None, 16)           1040        dense[0][0]                      
__________________________________________________________________________________________________
z_log_var (Dense)               (None, 16)           1040        dense[0][0]                      
____________________________________________________________________________________________

2024-08-12 16:09:48.703979: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/100
   80/20574 [..............................] - ETA: 39s - loss: 137.0983 - reconstruction_loss: 0.2126 - kl_loss: 85.8465 

2024-08-12 16:09:50.058588: I tensorflow/stream_executor/cuda/cuda_blas.cc:1760] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100


<keras.callbacks.History at 0x2b3e008ab910>

In [5]:
X = adata_raw.X.A[:, adata.var["velocity_genes"]]
velocity_deepvelo = autoencoder.predict(X)
print(velocity_deepvelo.shape)
adata.layers['velocity_dv'] = velocity_deepvelo

adata.write_h5ad("adata/DeepVelo_SA.h5ad")

(9144, 2000)
