In [None]:
from pathlib import Path
import scanpy as sc
import numpy as np

sc.settings.verbosity = 2
sc.settings.set_figure_params(dpi=120, facecolor="white")

adata = sc.read_h5ad(Path("../data/processed/visium_breast_cancer_blockA_section1_prepped.h5ad"))
adata


In [None]:
assert "connectivities" in adata.obsp and "distances" in adata.obsp


In [None]:
#Leiden at multiple resolutions

for r in [0.3, 0.5, 0.8, 1.0]:
    sc.tl.leiden(adata, resolution=r, key_added=f"leiden_r{str(r).replace('.','_')}")


In [None]:
#UMAP

sc.tl.umap(adata)

sc.pl.umap(
    adata,
    color=[f"leiden_r{str(r).replace('.','_')}" for r in [0.3, 0.5, 0.8, 1.0]],
    ncols=2,
    wspace=0.4
)


In [None]:
#Spatial view of clusters(0.5)

cluster_key = "leiden_r0_5"

sc.pl.spatial(
    adata,
    color=cluster_key,
    img_key="hires",
    size=1.2
)


In [None]:
#Spatial view of clusters(0.8)

cluster_key = "leiden_r0_8"

sc.pl.spatial(
    adata,
    color=cluster_key,
    img_key="hires",
    size=1.2
)

In [None]:
#Marker genes

sc.tl.rank_genes_groups(
    adata,
    groupby=cluster_key,
    method="wilcoxon",
    use_raw=True
)

sc.pl.rank_genes_groups(adata, n_genes=10, sharey=False)


In [None]:
import pandas as pd

markers = sc.get.rank_genes_groups_df(adata, group=None)
markers.to_csv(Path("../results") / f"markers_{cluster_key}.csv", index=False)
markers.head()


In [None]:
#Domain Annotation

gene_sets = {
    "immune": ["PTPRC","CD74","HLA-DRA","MS4A1","CD3D","CD3E","NKG7","LYZ","IGHG1","IGKC"],
    "epithelial": ["EPCAM","KRT8","KRT18","KRT19","MUC1","EHF"],
    "stromal": ["COL1A1","COL1A2","LUM","DCN","COL3A1","TAGLN","ACTA2","RGS5"],
    "endothelial": ["PECAM1","VWF","KDR","EMCN"],
}

sc.tl.score_genes(adata, gene_list=gene_sets["immune"], score_name="score_immune", use_raw=True)
sc.tl.score_genes(adata, gene_list=gene_sets["epithelial"], score_name="score_epithelial", use_raw=True)
sc.tl.score_genes(adata, gene_list=gene_sets["stromal"], score_name="score_stromal", use_raw=True)
sc.tl.score_genes(adata, gene_list=gene_sets["endothelial"], score_name="score_endothelial", use_raw=True)


In [None]:
sc.pl.spatial(
    adata,
    color=["score_immune","score_epithelial","score_stromal","score_endothelial"],
    img_key="hires",
    size=1.2,
    ncols=2,
    wspace=0.3
)


In [None]:
#Per Cluster Mean Score

import pandas as pd

cluster_means = (
    adata.obs[[cluster_key,"score_immune","score_epithelial","score_stromal","score_endothelial"]]
    .groupby(cluster_key)
    .mean()
    .sort_index()
)
cluster_means


In [None]:
#Final Domain Label Column 

cluster_to_domain = {c: "unknown" for c in adata.obs[cluster_key].cat.categories}
adata.obs["domain"] = adata.obs[cluster_key].map(cluster_to_domain).astype("category")


In [None]:
OUT = Path("../data/processed")
adata.write_h5ad(OUT / "visium_breast_cancer_blockA_section1_domains_tmp.h5ad")


In [None]:
from pathlib import Path
Path("../data/processed").mkdir(exist_ok=True)

adata.write_h5ad("../data/processed/visium_breast_cancer_blockA_section1_clustered.h5ad")
print("saved clustered h5ad")

