# Lab 3: Diffusion Pseudotime

**Module 3** - Inferring Pseudotime with Diffusion Maps

## Objectives
- Compute diffusion maps
- Select root cells
- Calculate diffusion pseudotime
- Validate ordering with markers


In [None]:
import scanpy as sc
import numpy as np
import matplotlib.pyplot as plt

sc.settings.verbosity = 3
sc.settings.set_figure_params(dpi=100, facecolor='white')

# Load pancreas development dataset
# This has clear differentiation trajectory
try:
    import scvelo as scv
    adata = scv.datasets.pancreas()
    print(f"Cells: {adata.n_obs}, Genes: {adata.n_vars}")
    print(f"Cell types: {adata.obs['clusters'].unique().tolist()}")
except:
    # Fallback to PBMC
    adata = sc.datasets.pbmc3k_processed()
    print("Using PBMC data (for demo only - not ideal for trajectory)")


In [None]:
# Preprocessing for trajectory analysis
sc.pp.filter_genes(adata, min_cells=3)
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=2000)
sc.pp.pca(adata)
sc.pp.neighbors(adata, n_pcs=30)
sc.tl.umap(adata)

sc.pl.umap(adata, color='clusters', title='Cell Types')


In [None]:
# Compute diffusion map
sc.tl.diffmap(adata)

# Visualize diffusion components
sc.pl.diffmap(adata, color='clusters', components=['1,2', '2,3'])


In [None]:
# Select root cell
# Choose a cell from the progenitor population
# For pancreas: Ductal or Ngn3 low EP cells are early

# Find index of a progenitor cell
if 'Ductal' in adata.obs['clusters'].values:
    root_cluster = 'Ductal'
else:
    root_cluster = adata.obs['clusters'].value_counts().index[0]

root_cells = adata.obs[adata.obs['clusters'] == root_cluster].index
root_idx = adata.obs.index.get_loc(root_cells[0])

print(f"Root cell cluster: {root_cluster}")
print(f"Root cell index: {root_idx}")

# Set root
adata.uns['iroot'] = root_idx


In [None]:
# Compute diffusion pseudotime
sc.tl.dpt(adata)

# Visualize pseudotime
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

sc.pl.umap(adata, color='dpt_pseudotime', ax=axes[0], show=False, 
           title='Diffusion Pseudotime')
sc.pl.umap(adata, color='clusters', ax=axes[1], show=False,
           title='Cell Types')

plt.tight_layout()
plt.show()


In [None]:
# Validate pseudotime with cell type order
pt_by_cluster = adata.obs.groupby('clusters')['dpt_pseudotime'].mean().sort_values()
print("Mean pseudotime by cluster (should follow differentiation):")
print(pt_by_cluster)

# Boxplot of pseudotime by cluster
sc.pl.violin(adata, keys='dpt_pseudotime', groupby='clusters', rotation=45)
