In [1]:
import velvet as vt

# general packages
import numpy as np
import pandas as pd
import torch
from scipy.sparse import issparse

# velocity packages
import scanpy as sc
import scvelo as scv
import anndata as ann

# plotting packages
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm, trange
from IPython.display import clear_output

# color palette object
from colors import colorpalette as colpal

Global seed set to 0
  new_rank_zero_deprecation(
  return new_rank_zero_deprecation(*args, **kwargs)


In [15]:
from sklearn.metrics.pairwise import cosine_similarity


In [26]:
dataset = ['mini_V3', 'mini_MN', 'mini_MD',
           'midi_NM', 'midi_Ne', 'maxi']

scores = np.zeros(len(dataset))
for i, name in enumerate(dataset):
    adata = sc.read_h5ad(f'../../data/benchmarking/{name}.h5ad')
    cr = adata.obsm['velocity_cr_pca'][:,:50]
    pt = adata.obsm['velocity_pst'][:,:50]
    score = np.diagonal(cosine_similarity(cr, pt)).mean()
    scores[i] = score

np.save('../../output_data/benchmarking_scores/positive_control.npy', scores)

In [55]:
def CR_pipeline(adata, name):
    CR = adata.obsm['velocity_cr_pca'][:,:50]
    PCs = adata.varm['precomputed_PCs']
    X_pca = adata.obsm['X_pca'][:,:50]
    Y_pca = adata.obsm['X_pca'][:,:50] + CR
    
    X_hat = X_pca @ PCs[:,:50].T
    Y_hat = Y_pca @ PCs[:,:50].T
    V_hat = Y_hat - X_hat
    
    return V_hat

def PT_pipeline(adata, name):
    PT = adata.obsm['velocity_pst'][:,:50]
    PCs = adata.varm['precomputed_PCs']
    X_pca = adata.obsm['X_pca'][:,:50]
    Y_pca = adata.obsm['X_pca'][:,:50] + PT
    
    X_hat = X_pca @ PCs[:,:50].T
    Y_hat = Y_pca @ PCs[:,:50].T
    V_hat = Y_hat - X_hat
    
    return V_hat

In [53]:
def gene_specific_benchmark(output_folder, pipeline, pipeline_name):
    gene_results = {
        'mini_MN':[
            ['leiden','4','Olig2','-'],
            ['leiden','4','Tubb3','+'],
            ['leiden','2','Neurog2','+'],
            ['leiden','3','Isl2','+'],
        ],
        'mini_V3':[
            ['leiden','1','Sim1','+'],
            ['leiden','1','Sox2','-'],
            ['leiden','3','Tubb3','+'],
            ['leiden','1','Map2','+'],
        ],
        'mini_MD':[
            ['leiden','3','Sox2','-'],
            ['leiden','3','Nkx1-2','-'],
            ['leiden','3','T','-'],
            ['leiden','2','Meox1','+'],
        ],
        'midi_NM':[
            ['cell_annotation','Neural','Olig2','+'],
            ['cell_annotation','Neural','T','-'],
            ['cell_annotation','Mesoderm','Meox1','+'],
            ['cell_annotation','Early_Neural','Irx3','+'],
        ],
        'midi_Ne':[
            ['cell_annotation','Neural','Olig2','+'],
            ['cell_annotation','FP','Shh','+'],
            ['cell_annotation','P3','Nkx2-2','+'],
            ['cell_annotation','pMN','Irx3','-'],
        ]
    }
    
    scores = []
    for name, settings in gene_results.items():
        print(f"GENE SCORE: {name}")
        adata = sc.read_h5ad(f'../../data/benchmarking/{name}.h5ad')
        adata.layers['velocity'] = pipeline(adata, name)

        for seti in settings:
            sub = adata[adata.obs[seti[0]]==seti[1]]
            vel = sub[:,seti[2]].layers['velocity'].flatten()
            if seti[3]=='-':
                score = np.mean(vel<0)
            elif seti[3]=='+':
                score = np.mean(vel>0)
            scores.append(score)
           
    scores = np.array(scores)
    np.save(f'{output_folder}/{pipeline_name}_gene_specific_scores.npy', scores)

In [56]:
print("CR...")
gene_specific_benchmark(
    output_folder='../../output_data/benchmarking_scores', 
    pipeline=CR_pipeline, 
    pipeline_name='CR_POSCONTROL'
)

print("PT...")
gene_specific_benchmark(
    output_folder='../../output_data/benchmarking_scores', 
    pipeline=PT_pipeline, 
    pipeline_name='PT_POSCONTROL'
)

CR...
GENE SCORE: mini_MN
GENE SCORE: mini_V3
GENE SCORE: mini_MD
GENE SCORE: midi_NM
GENE SCORE: midi_Ne
PT...
GENE SCORE: mini_MN
GENE SCORE: mini_V3
GENE SCORE: mini_MD
GENE SCORE: midi_NM
GENE SCORE: midi_Ne
