# Lab 7: scVI Integration

**Module 7** - Deep Learning for Batch Correction

## Objectives
- Set up scVI model
- Run integration with VAE
- Compare to Harmony results


In [None]:
import scanpy as sc
import numpy as np
import matplotlib.pyplot as plt

try:
    import scvi
    SCVI_AVAILABLE = True
except ImportError:
    SCVI_AVAILABLE = False
    print("scvi-tools not installed. Run: pip install scvi-tools")

sc.settings.set_figure_params(dpi=100, facecolor='white')

# Load raw data (scVI works best with raw counts)
adata = sc.datasets.pbmc3k()
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)

# Simulate batches
np.random.seed(42)
adata.obs['batch'] = np.random.choice(['batch_1', 'batch_2'], size=adata.n_obs)
print(f"Cells: {adata.n_obs}, Genes: {adata.n_vars}")


In [None]:
if SCVI_AVAILABLE:
    # Setup scVI
    scvi.model.SCVI.setup_anndata(
        adata,
        batch_key='batch',
        layer=None  # Use X for counts
    )
    
    # Create and train model
    model = scvi.model.SCVI(adata, n_latent=30)
    model.train(max_epochs=100, early_stopping=True)
    
    print("scVI training complete")


In [None]:
if SCVI_AVAILABLE:
    # Get integrated latent representation
    adata.obsm['X_scvi'] = model.get_latent_representation()
    
    # Compute UMAP on scVI embedding
    sc.pp.neighbors(adata, use_rep='X_scvi')
    sc.tl.umap(adata)
    sc.tl.leiden(adata)
    
    # Visualize
    fig, axes = plt.subplots(1, 2, figsize=(12, 5))
    sc.pl.umap(adata, color='batch', ax=axes[0], show=False, title='scVI - Batch')
    sc.pl.umap(adata, color='leiden', ax=axes[1], show=False, title='scVI - Clusters')
    plt.tight_layout()
    plt.show()
