In [1]:
import os, pickle


with open('cell_wise_correlations.pkl', 'rb') as f:
    cell_wise_correlations = pickle.load(f)


In [6]:
cell_wise_correlations["forebrain"][('celldancer', 'scvelo')]

array([        nan,  0.10966198, -0.1867114 , ...,         nan,
               nan,         nan])

In [7]:
import scanpy as sc

In [12]:
import scanpy as sc
import numpy as np
import scvelo as scv

# Load the two AnnData objects
scvelo = sc.read_h5ad("/mnt/data2/home/leonardo/git/multilineage_velocity/benchmark/scvelo/pancreas/scvelo_pancreas.h5ad")
ivelo = sc.read_h5ad("/mnt/data2/home/leonardo/git/multilineage_velocity/benchmark/ivelo/pancreas/ivelo_pancreas.h5ad")

# Step 1: Extract the velocity matrices
velocity_scvelo = scvelo.layers['velocity']
velocity_ivelo = ivelo.layers['velocity']

# Step 2: Remove genes with NaN values in each model individually
# Keep cells but remove genes that have NaN values in either model
nan_mask_scvelo_genes = ~np.isnan(velocity_scvelo).any(axis=0)  # Remove genes with NaNs in scvelo
nan_mask_ivelo_genes = ~np.isnan(velocity_ivelo).any(axis=0)  # Remove genes with NaNs in ivelo

# Subset the velocity matrices to exclude genes with NaNs
velocity_scvelo_filtered = velocity_scvelo[:, nan_mask_scvelo_genes]
velocity_ivelo_filtered = velocity_ivelo[:, nan_mask_ivelo_genes]

# Update the gene names to reflect the filtered matrices
scvelo_genes_filtered = scvelo.var_names[nan_mask_scvelo_genes]
ivelo_genes_filtered = ivelo.var_names[nan_mask_ivelo_genes]

# Step 3: Find the intersection of the filtered genes
shared_genes = scvelo_genes_filtered.intersection(ivelo_genes_filtered)
shared_cells = scvelo.obs_names.intersection(ivelo.obs_names)

print(f"Number of shared genes after filtering: {len(shared_genes)}")
print(f"Number of shared cells: {len(shared_cells)}")

# Step 4: Subset both AnnData objects to the shared genes and cells
scvelo_shared = scvelo[shared_cells, shared_genes].copy()
ivelo_shared = ivelo[shared_cells, shared_genes].copy()

# Extract the filtered velocity matrices for shared genes and cells
velocity_scvelo_shared = scvelo_shared.layers['velocity']
velocity_ivelo_shared = ivelo_shared.layers['velocity']

# Step 5: Compute gene-wise correlations (across shared cells)
if velocity_scvelo_shared.shape[1] > 0:  # Ensure there are valid genes remaining
    gene_corr = scv.utils.vcorrcoef(velocity_scvelo_shared, velocity_ivelo_shared, axis=1)
    print(f"Gene-wise correlation shape: {gene_corr.shape}")
else:
    print("No valid genes remain after filtering. Cannot compute gene-wise correlations.")

# Step 6: Compute cell-wise correlations (across shared genes)
if velocity_scvelo_shared.shape[0] > 0:  # Ensure there are valid cells remaining
    cell_corr = scv.utils.vcorrcoef(velocity_scvelo_shared, velocity_ivelo_shared, axis=0)
    print(f"Cell-wise correlation shape: {cell_corr.shape}")
else:
    print("No valid cells remain after filtering. Cannot compute cell-wise correlations.")


Number of shared genes after filtering: 925
Number of shared cells: 3696
Gene-wise correlation shape: (3696,)
Cell-wise correlation shape: (925,)


: 

In [9]:
adata

AnnData object with n_obs × n_vars = 3696 × 2000
    obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score', 'initial_size_unspliced', 'initial_size_spliced', 'initial_size', 'n_counts', 'isomap_1', 'isomap_2', 'isomap_3', 'isomap_1+2', 'isomap_1+3', 'isomap_2+3', 'pca_1', 'pca_2', 'pca_3', 'pca_1+2', 'pca_1+3', 'pca_2+3', 'isomap_1_ve', 'isomap_2_ve', 'isomap_3_ve', 'isomap_1_MuMs', 'isomap_2_MuMs', 'isomap_3_MuMs', 'isomap_1_z', 'isomap_2_z', 'isomap_3_z', 'isomap_1_velocity', 'isomap_2_velocity', 'isomap_3_velocity', 'isomap_1_velocity_concat', 'isomap_2_velocity_concat', 'isomap_3_velocity_concat', 'isomap_1_PCA', 'isomap_2_PCA', 'isomap_3_PCA', 'velocity_self_transition', 'velocity_length', 'velocity_confidence', 'velocity_confidence_transition', 'root_cells', 'end_points', 'velocity_pseudotime'
    var: 'highly_variable_genes', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'fit_r2', 'fit_alpha', 'fit_beta', 'fit_gamma', 'fit_t_', 'fit_scali