# Lab 5: Harmony Integration

**Module 5** - Fast Batch Correction with Harmony

## Objectives
- Run Harmony on multi-batch data
- Understand Harmony parameters
- Evaluate integration quality


In [None]:
import scanpy as sc
import numpy as np
import matplotlib.pyplot as plt

try:
    import harmonypy
    HARMONY_AVAILABLE = True
except ImportError:
    HARMONY_AVAILABLE = False
    print("harmonypy not installed. Run: pip install harmonypy")

sc.settings.set_figure_params(dpi=100, facecolor='white')

# Load data with batch information
adata = sc.datasets.pbmc3k_processed()

# Simulate batches
np.random.seed(42)
adata.obs['batch'] = np.random.choice(['batch_1', 'batch_2', 'batch_3'], size=adata.n_obs)
print(f"Batches: {adata.obs['batch'].value_counts().to_dict()}")


In [None]:
if HARMONY_AVAILABLE:
    # Run Harmony on PCA embedding
    # Note: Harmony works on the PCA space
    sc.external.pp.harmony_integrate(
        adata, 
        key='batch',
        basis='X_pca',
        adjusted_basis='X_pca_harmony'
    )
    print("Harmony integration complete")
    print(f"New embedding: {adata.obsm['X_pca_harmony'].shape}")


In [None]:
if HARMONY_AVAILABLE:
    # Compute new UMAP on corrected embedding
    sc.pp.neighbors(adata, use_rep='X_pca_harmony')
    sc.tl.umap(adata)
    
    # Compare before and after
    fig, axes = plt.subplots(1, 2, figsize=(12, 5))
    
    # For "before", we need to recompute with original PCA
    adata_orig = adata.copy()
    sc.pp.neighbors(adata_orig, use_rep='X_pca')
    sc.tl.umap(adata_orig)
    
    sc.pl.umap(adata_orig, color='batch', ax=axes[0], show=False, title='Before Harmony')
    sc.pl.umap(adata, color='batch', ax=axes[1], show=False, title='After Harmony')
    
    plt.tight_layout()
    plt.show()
