In [1]:
import scanpy as sc
import scvelo as scv
import numpy as np
from utils import color_keys
from sklearn.neighbors import kneighbors_graph
from scipy.sparse.csgraph import dijkstra

In [2]:
# Function to min-max normalize columns of a matrix
def min_max_normalize_columns(matrix):
    # Calculate min and max for each column
    min_vals = np.min(matrix, axis=0)
    max_vals = np.max(matrix, axis=0)
    # Avoid division by zero for columns with constant values
    range_vals = max_vals - min_vals
    range_vals[range_vals == 0] = 1
    # Normalize each column
    normalized_matrix = (matrix - min_vals) / range_vals
    return normalized_matrix

In [3]:
adata = scv.datasets.pancreas()
scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000)
scv.pp.moments(adata, n_neighbors=200)
adata.obsm["old_umap"] = adata.obsm["X_umap"].copy()

# Apply min-max normalization to adata.layers["Mu"] columns
adata.layers["Mu"] = min_max_normalize_columns(adata.layers["Mu"])

# Apply min-max normalization to adata.layers["Ms"] columns
adata.layers["Ms"] = min_max_normalize_columns(adata.layers["Ms"])

Mu_Ms = np.concatenate([adata.layers["Mu"], adata.layers["Ms"]], axis=1)
adata.obsm["Mu_Ms"] = Mu_Ms

"""sc.pp.neighbors(adata, use_rep="Mu_Ms")
sc.tl.umap(adata)
sc.pl.umap(adata, color="clusters")"""

cell_type_key = "clusters"
adata = color_keys(adata, cell_type_key)
neighbors = 15
indices = np.zeros((adata.shape[0], neighbors+1))

u = adata.layers["unspliced"].toarray()
s = adata.layers["spliced"].toarray()
u_s = np.concatenate([u, s], axis=1)
# Step 1: Create a nearest-neighbor graph (Euclidean distances for the graph construction)
knn_graph = kneighbors_graph(u_s, n_neighbors=neighbors, mode='distance', include_self=False)

# Step 2: Compute the geodesic distances using Dijkstra's algorithm
# This returns the shortest paths between all pairs of points
geodesic_distances = dijkstra(csgraph=knn_graph, directed=False, return_predecessors=False)

# Step 3: Build the neighbor matrix
# For each cell, find its nearest neighbors based on geodesic distances
nearest_neighbors = []
for i in range(geodesic_distances.shape[0]):
    sorted_indices = np.argsort(geodesic_distances[i])
    nearest_neighbors.append(sorted_indices[:neighbors + 1])  # Include the cell itself

local_nearest_neighbor_matrix = np.array(nearest_neighbors)

adata.uns["indices"] = local_nearest_neighbor_matrix

adata.write("pancreas_common_smoothing_minmax/pancreas_minmax.h5ad")


Filtered out 20801 genes that are detected 20 counts (shared).
Normalized count data: X, spliced, unspliced.
Extracted 2000 highly variable genes.
Logarithmized X.
computing neighbors


  log1p(adata)


    finished (0:00:04) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:02) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
