# Lab 4: Explore Cellâ€“Cell Distances

## Objectives
- Compare distance metrics (Euclidean vs cosine/correlation)
- See how metric choice changes neighbor structure

## Outputs
- `../results/lab04_distance_notes.md`

---


In [None]:
import scanpy as sc
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

from sklearn.metrics import pairwise_distances

adata = sc.datasets.pbmc3k_processed()
X = adata.obsm['X_pca'][:, :30]

# Subsample for pairwise distance plots
np.random.seed(0)
idx = np.random.choice(adata.n_obs, size=min(500, adata.n_obs), replace=False)
Xsub = X[idx]

D_euc = pairwise_distances(Xsub, metric='euclidean')
D_cos = pairwise_distances(Xsub, metric='cosine')

fig, axes = plt.subplots(1, 2, figsize=(12, 4))
axes[0].hist(D_euc[np.triu_indices_from(D_euc, k=1)], bins=50, color='grey70')
axes[0].set_title('Euclidean distance (PCA)')
axes[1].hist(D_cos[np.triu_indices_from(D_cos, k=1)], bins=50, color='steelblue')
axes[1].set_title('Cosine distance (PCA)')
plt.tight_layout()
plt.show()

Path('../results').mkdir(exist_ok=True)
Path('../results/lab04_distance_notes.md').write_text(
    '# Lab 4 Notes\n\n'
    '- Euclidean distance uses magnitude differences; cosine emphasizes direction.\n'
    '- Choice affects kNN graph and cluster boundaries.\n'
)
print('Wrote ../results/lab04_distance_notes.md')
