# Lab 10: Reference Mapping

**Module 10** - Mapping Query Data to Reference Atlases

## Objectives
- Project new data onto reference
- Transfer cell type labels
- Assess mapping quality


In [None]:
import scanpy as sc
import numpy as np
import matplotlib.pyplot as plt

sc.settings.set_figure_params(dpi=100, facecolor='white')

# Reference mapping concept:
# 1. Build reference atlas (well-annotated)
# 2. Project new query data onto reference
# 3. Transfer annotations from reference to query

# Load reference data
reference = sc.datasets.pbmc3k_processed()
print(f"Reference: {reference.n_obs} cells")


In [None]:
# Simulate query data (subset of reference for demo)
np.random.seed(42)
query_idx = np.random.choice(reference.n_obs, size=500, replace=False)
query = reference[query_idx].copy()

# Remove labels from query (we'll transfer them)
query.obs['true_label'] = query.obs['louvain'].copy()
del query.obs['louvain']

print(f"Query: {query.n_obs} cells (labels hidden)")


In [None]:
# Simple k-NN based label transfer
from sklearn.neighbors import KNeighborsClassifier

# Train classifier on reference
X_ref = reference.obsm['X_pca'][:, :30]
y_ref = reference.obs['louvain']

knn = KNeighborsClassifier(n_neighbors=15)
knn.fit(X_ref, y_ref)

# Predict labels for query
X_query = query.obsm['X_pca'][:, :30]
query.obs['transferred_label'] = knn.predict(X_query)

# Evaluate accuracy
accuracy = (query.obs['transferred_label'] == query.obs['true_label']).mean()
print(f"Label transfer accuracy: {accuracy:.1%}")
