# InverseSC Quickstart

This notebook demonstrates the basic usage of InverseSC for single-cell RNA-seq analysis.

In [None]:
import numpy as np
import scanpy as sc
import inverse_sc as isc
import matplotlib.pyplot as plt

## 1. Generate Synthetic Data

For this example, we'll use synthetic data where we know the ground truth.

In [None]:
# Generate synthetic scRNA-seq data
adata, ground_truth = isc.validation.generate_synthetic_data(
    n_cells=1000,
    n_genes=2000,
    n_programs=5,
    seed=42,
)

print(f"Data shape: {adata.shape}")
print(f"Dropout rate: {(adata.X == 0).mean():.2%}")

## 2. Standard Scanpy Pipeline (for comparison)

In [None]:
# Standard preprocessing
sc.pp.filter_genes(adata, min_cells=3)
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=1000)

# PCA, neighbors, UMAP
sc.tl.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)
sc.tl.leiden(adata)

In [None]:
# Visualize standard pipeline
sc.pl.umap(adata, color='leiden', title='Standard Pipeline')

## 3. InverseSC: Fit Inverse Model

Now let's apply the inverse problem framework.

In [None]:
# Fit inverse model
isc.pp.fit_inverse_model(
    adata,
    n_latent=30,
    n_programs=5,
    n_epochs=100,
    batch_size=256,
)

## 4. Analyze Results

The inferred true expression is now in `adata.obsm['Z_true_mean']`.

In [None]:
# Downstream analysis on inferred expression
sc.pp.neighbors(adata, use_rep='Z_true_mean')
sc.tl.leiden(adata, key_added='leiden_inverse')
sc.tl.umap(adata)

# Visualize
sc.pl.umap(adata, color='leiden_inverse', title='Inverse Problem')

## 5. Uncertainty Quantification

Unlike standard methods, we have uncertainty estimates!

In [None]:
# Compute cluster confidence
isc.tl.cluster_uncertainty(adata, cluster_key='leiden_inverse')

# Visualize confidence
sc.pl.umap(adata, color='cluster_confidence', cmap='RdYlGn')

## 6. Compare to Ground Truth

Since we have ground truth, let's evaluate recovery.

In [None]:
# Benchmark
results = isc.validation.benchmark_against_scanpy(adata, ground_truth)
print(results)

In [None]:
# Uncertainty calibration
calibration = isc.validation.uncertainty_calibration(adata, ground_truth)
print("Calibration metrics:")
for k, v in calibration.items():
    print(f"  {k}: {v:.3f}")

## 7. Program Interpretation

In [None]:
# Interpret learned programs
program_info = isc.tl.interpret_programs(adata, top_genes=20)

# Show top genes for each program
for prog_idx, df in program_info.items():
    print(f"\nProgram {prog_idx}:")
    print(df.head(10))