Process Wu et al slides

In [1]:
import os
import numpy as np
import pandas as pd
import scanpy as sc
import squidpy as sq

In [2]:
slide_names = ['1142243F', '1160920F', 'CID4290', 'CID4465', 'CID44971', 'CID4535']

In [3]:
data_dir = os.path.join('..', '..', 'data', 'wu_et_al')

Load Junk Breast Cancer data

In [4]:
for slide_name in slide_names:
    
    adata = sc.read_h5ad(os.path.join(data_dir, slide_name, f"{slide_name}.h5ad"))
    del adata.layers['logcounts']
    from squidpy._constants._pkg_constants import Key
    import json
    from pathlib import Path
    from squidpy.read._utils import _load_image, _read_counts
    from squidpy.datasets._utils import PathLike
    
    library_id = slide_name
    path = Path(os.path.join(data_dir, slide_name))
    adata.uns[Key.uns.spatial] = {}
    adata.uns[Key.uns.spatial][library_id] = {}
    
    adata.uns[Key.uns.spatial][library_id][Key.uns.image_key] = {
    res: _load_image(path / f"{Key.uns.spatial}/tissue_{res}_image.png") for res in ["hires", "lowres"]
    }
    adata.uns[Key.uns.spatial][library_id]["scalefactors"] = json.loads(
        (path / f"{Key.uns.spatial}/scalefactors_json.json").read_bytes()
    )
    
    tissue_positions_file = (
    path / "spatial/tissue_positions.csv"
    if (path / "spatial/tissue_positions.csv").exists()
    else path / "spatial/tissue_positions_list.csv"
    )

    coords = pd.read_csv(
        tissue_positions_file,
        header=1 if tissue_positions_file.name == "tissue_positions.csv" else None,
        index_col=0,
    )
    coords.columns = ["in_tissue", "array_row", "array_col", "pxl_col_in_fullres", "pxl_row_in_fullres"]
    # https://github.com/scverse/squidpy/issues/657
    coords.set_index(coords.index.astype(adata.obs.index.dtype), inplace=True)

    adata.obs = pd.merge(adata.obs, coords, how="left", left_index=True, right_index=True)
    adata.obsm[Key.obsm.spatial] = adata.obs[["pxl_row_in_fullres", "pxl_col_in_fullres"]].values
    adata.obs.drop(columns=["pxl_row_in_fullres", "pxl_col_in_fullres"], inplace=True)
    
    obs = pd.read_csv(os.path.join(data_dir, slide_name, "metadata.csv"), index_col=0)
    adata.obs = adata.obs.merge(obs, left_index=True, right_index=True)

    adata.write_h5ad(os.path.join(data_dir, f"{slide_name}.h5ad"))