# Lab 5: Graph-Based Clustering

**Module 5** - Leiden Clustering

## Objectives
- Build k-NN and SNN graphs
- Run Leiden clustering
- Explore resolution parameter
- Evaluate cluster quality


In [None]:
import scanpy as sc
import numpy as np
import matplotlib.pyplot as plt

sc.settings.verbosity = 3
sc.settings.set_figure_params(dpi=100, facecolor='white')

# Load preprocessed data
adata = sc.datasets.pbmc3k_processed()
print(f"Cells: {adata.n_obs}")


In [None]:
# Build neighbor graph
sc.pp.neighbors(adata, n_neighbors=15, n_pcs=40)
print("Neighbor graph built")

# Check the graph
print(f"Graph shape: {adata.obsp['distances'].shape}")
print(f"Non-zero connections per cell: {np.mean(adata.obsp['connectivities'].getnnz(axis=1)):.1f}")


In [None]:
# Leiden clustering at multiple resolutions
resolutions = [0.1, 0.3, 0.5, 0.8, 1.0, 1.5, 2.0]

for res in resolutions:
    key = f'leiden_{res}'
    sc.tl.leiden(adata, resolution=res, key_added=key)
    n_clusters = adata.obs[key].nunique()
    print(f"Resolution {res}: {n_clusters} clusters")


In [None]:
# Visualize different resolutions
sc.tl.umap(adata)

fig, axes = plt.subplots(2, 4, figsize=(16, 8))
axes = axes.flatten()

for i, res in enumerate(resolutions):
    key = f'leiden_{res}'
    sc.pl.umap(adata, color=key, ax=axes[i], show=False, title=f'res={res}')

axes[-1].axis('off')
plt.tight_layout()
plt.show()


In [None]:
# Choose resolution 0.5 for downstream analysis (typical for PBMC)
sc.tl.leiden(adata, resolution=0.5, key_added='leiden')
print(f"Final clustering: {adata.obs['leiden'].nunique()} clusters")

# Cluster sizes
print("\nCluster sizes:")
print(adata.obs['leiden'].value_counts().sort_index())
