# Run all — Download → Convert metadata → Build h5ad → Make figures

This notebook runs the full reproducible pipeline end-to-end.

If you prefer stepwise runs, use the numbered notebooks in this folder instead.

In [None]:
from pathlib import Path
import sys, subprocess, os

# --- locate repo root robustly ---
ROOT = Path.cwd()
if not (ROOT / "scripts").exists() and (ROOT.parent / "scripts").exists():
    ROOT = ROOT.parent
if not (ROOT / "scripts").exists():
    raise RuntimeError("Could not find repo root (missing ./scripts). Start Jupyter from the repo root or the notebooks/ folder.")
print("Repo root:", ROOT)

## 1) Download GEO supplementary files

In [None]:
outdir = ROOT / "data" / "stamp_fig1_samples"
outdir.mkdir(parents=True, exist_ok=True)
subprocess.run([sys.executable, str(ROOT/"scripts"/"00_download_geo.py"), "--outdir", str(outdir)], check=True)

## 2) Convert `.qs.gz` → `*_metadata.csv` (requires R)

In [None]:
subprocess.run(["bash", str(ROOT/"scripts"/"01_prepare_metadata.sh")], check=True)

## 3) Build analyzed AnnData (`fig1_scanvi_analyzed.h5ad`)

In [None]:
out_sc = ROOT / "stamp_fig1_scanvi_outputs"
out_sc.mkdir(parents=True, exist_ok=True)
subprocess.run([sys.executable, str(ROOT/"scripts"/"02_scanvi_reference_mapping.py"),
                "--data_dir", str(ROOT/"data"/"stamp_fig1_samples"),
                "--outdir", str(out_sc)], check=True)

## 4) Generate Figure 2

In [None]:
subprocess.run([sys.executable, str(ROOT/"scripts"/"03_make_figure2.py"),
                "--h5ad", str(ROOT/"stamp_fig1_scanvi_outputs"/"fig1_scanvi_analyzed.h5ad"),
                "--outdir", str(ROOT/"figures"/"Figure2")], check=True)

## 5) Generate Figure 3

In [None]:
subprocess.run([sys.executable, str(ROOT/"scripts"/"04_make_figure3.py"),
                "--h5ad", str(ROOT/"stamp_fig1_scanvi_outputs"/"fig1_scanvi_analyzed.h5ad"),
                "--outdir", str(ROOT/"figures"/"Figure3")], check=True)

Done. Figures are in `figures/Figure2/` and `figures/Figure3/`.