In [None]:
# --- PC sweep: neighbors + UMAP + Leiden + silhouette + per-PC UMAP plots ---



# choose the PC values to test
PC_GRID = [5]#, 7, 8, 9, 10, 11, 12, 13, 15, 20, 25, 30]

# consistent styling
plt.style.use("dark_background")

def plot_umap_for_pc(adata, n, figdir):
    """Plot a UMAP for a specific PC setting n using stored keys."""
    umap_key   = f"X_umap_pc{n}"
    leiden_key = f"leiden_pc{n}"
    XY = adata.obsm[umap_key]
    x, y = XY[:, 0], XY[:, 1]

    cats = adata.obs[leiden_key].astype("category").cat.remove_unused_categories()
    palette = sc.plotting.palettes.default_102[:len(cats.cat.categories)]

    fig, ax = plt.subplots(figsize=(9.2, 8))
    # draw each cluster separately for crisper colors & legend
    for k, cat in enumerate(cats.cat.categories):
        idx = (cats.values == cat)
        ax.scatter(x[idx], y[idx], s=1.0, c=[palette[k]], lw=0, alpha=0.9,
                   rasterized=True, label=f"{cat} (n={int(idx.sum()):,})")

    # label medoids
    med = (pd.DataFrame({"x": x, "y": y, "c": cats.values})
           .groupby("c", observed=True)[["x","y"]].median())
    for lab, (cx, cy) in med.iterrows():
        ax.text(cx, cy, str(lab), ha="center", va="center",
                fontsize=14, fontweight="bold", color="white",
                path_effects=[mpl.patheffects.withStroke(linewidth=3, foreground="#111")])

    ax.set_xlabel("UMAP1", color="white")
    ax.set_ylabel("UMAP2", color="white")
    ax.set_aspect("equal", "box")
    ax.set_title(f"UMAP — Leiden (n_pcs={n})", fontsize=16, weight="bold")

    # legend outside
    lines, labels = ax.get_legend_handles_labels()
    leg = ax.legend(lines, labels, title="Clusters",
                    loc="center left", bbox_to_anchor=(1.02, 0.5),
                    frameon=True, fontsize=9, markerscale=6/1.0)
    leg.get_frame().set_facecolor("black")
    leg.get_frame().set_edgecolor("white")
    leg.get_title().set_color("white")
    for t in leg.get_texts(): t.set_color("white")

    fig.tight_layout()
    out = figdir / f"umap_leiden_pc{n}.png"
    fig.savefig(out, dpi=500, bbox_inches="tight")
    plt.close(fig)
    return out

scores = {}
umap_files = {}

for n in PC_GRID:
    # build graph & embedding for this n
    sc.pp.neighbors(adata, n_neighbors=15, n_pcs=n, metric="euclidean")
    sc.tl.umap(adata, min_dist=0.30, spread=1.0, random_state=0)
    sc.tl.leiden(adata, resolution=0.8, flavor="igraph",
                 n_iterations=2, directed=False, random_state=0)

    # store to unique keys so we can plot later without overwriting
    adata.obsm[f"X_umap_pc{n}"] = adata.obsm["X_umap"].copy()
    adata.obs[f"leiden_pc{n}"]  = adata.obs["leiden"].astype(str).values

    # silhouette in the *same space used to build neighbors* (the first n PCs)
    labels_int = adata.obs[f"leiden_pc{n}"].astype(int).values
    sil = silhouette_score(adata.obsm["X_pca"][:, :n], labels_int)
    scores[n] = sil
    print(f"n_pcs={n:>2}: silhouette = {sil:.3f}")

    # per-PC UMAP figure
    umap_files[n] = plot_umap_for_pc(adata, n, FIGS)

# pick the winner & show summary
best_n = max(scores, key=scores.get)
print(f"\nBest silhouette: n_pcs={best_n}  (score={scores[best_n]:.3f})")

# --- summary line plot of silhouette vs n_pcs ---
fig, ax = plt.subplots(figsize=(8.5, 5.5))
xs = np.array(list(scores.keys()))
ys = np.array([scores[k] for k in xs])
order = np.argsort(xs)
ax.plot(xs[order], ys[order], marker="o")
ax.set_xlabel("Number of PCs", color="white")
ax.set_ylabel("Silhouette score", color="white")
ax.set_title("Silhouette vs. n_pcs", fontsize=15, weight="bold")
ax.grid(True, alpha=0.25)
# mark best
ax.scatter([best_n], [scores[best_n]], s=120, edgecolor="black", c="gold", zorder=3)
ax.annotate(f"best = {best_n}\n{scores[best_n]:.3f}",
            xy=(best_n, scores[best_n]),
            xytext=(10, 10), textcoords="offset points",
            fontsize=11, weight="bold",
            bbox=dict(boxstyle="round,pad=0.2", fc="#111", ec="white", alpha=0.8))
fig.tight_layout()
fig.savefig(FIGS / "silhouette_vs_pcs.png", dpi=500, bbox_inches="tight")
plt.close(fig)


We compute Wilcoxon markers and **neighborhood enrichment** (Squidpy) to assess micro-territories.


In [None]:
# Figure 13 — Heatmap of top markers
fig = sc.pl.rank_genes_groups_heatmap(
    adata, 
    n_genes=4, 
    show=False, 
    use_raw=False, 
    swap_axes=True,        # often easier to read
    dendrogram=False, 
    standard_scale="var"   # normalize genes across clusters
)
plt.gcf().savefig(FIGS / "13_markers_heatmap.png", dpi=220, bbox_inches="tight")
plt.close()


In [None]:
#Spatial neighbors + neighborhood enrichment (Squidpy

In [None]:

# Build spatial graph from your coordinates in adata.obsm['spatial'] (units: microns)
# radius: choose ~25–35 µm for Xenium cell neighborhoods; tweak if too sparse/dense
sq.gr.spatial_neighbors(
    adata,
    coord_type="generic",   # coordinates are in plain x/y (not pixel indices)
    spatial_key="spatial",
    radius=30.0,            # try 25–35
    n_rings=1,              # one-ring neighborhoods
    set_diag=False
)

# Neighborhood enrichment requires permutations (must be > 0).
# 100 is fast-ish and gives stable z-scores; you can raise later to 1000 for publication.
sq.gr.nhood_enrichment(
    adata, 
    cluster_key="leiden", 
    n_perms=100, 
    show_progress_bar=False
)


In [None]:
# Figure 14 — Neighborhood enrichment heatmap
# Shows over/under-representation of cluster pairs in each other's neighborhood.
fig = sq.pl.nhood_enrichment(
    adata, 
    cluster_key="leiden", 
    show=False, 
    cmap="coolwarm", 
    vmin=-5, vmax=5  # clip extremes for readability
)
plt.gcf().savefig(FIGS / "14_nhood_enrichment.png", dpi=220, bbox_inches="tight")
plt.close()

In [None]:
# Figure 15 — Cluster size barplot
# Helpful to see abundance per cluster
counts = adata.obs["leiden"].value_counts().sort_index()
plt.figure(figsize=(6,3))
counts.plot(kind="bar")
plt.ylabel("cells")
plt.title("Cluster sizes (Leiden)")
plt.tight_layout()
plt.savefig(FIGS / "15_cluster_sizes.png", dpi=220)
plt.close()

In [None]:
# Figure 16 — Top marker per cluster (dotplot subset)
# Extract top gene names per cluster and plot a compact dotplot.
top = 3
marker_df = sc.get.rank_genes_groups_df(adata, group=None)
top_markers = (marker_df
               .sort_values(["group", "pvals_adj"])
               .groupby("group")
               .head(top)["names"]
               .unique().tolist())

fig = sc.pl.dotplot(
    adata, 
    var_names=top_markers, 
    groupby="leiden", 
    show=False, 
    standard_scale="var"
)
plt.gcf().savefig(FIGS / "16_top_marker_dotplot.png", dpi=220, bbox_inches="tight")
plt.close()

In [None]:
# Figure 18 — Neighborhood graph degree (how many neighbors each cell has)
# Useful to check radius choice didn't create a too-sparse or too-dense graph.
A = adata.obsp["spatial_connectivities"]  # CSR matrix
deg = np.asarray(A.sum(axis=1)).ravel()
plt.figure(figsize=(5,3))
plt.hist(deg, bins=50)
plt.xlabel("neighbors per cell"); plt.ylabel("cells")
plt.title("Spatial graph degree")
plt.tight_layout()
plt.savefig(FIGS / "18_neighbor_degree_hist.png", dpi=220)
plt.close()