# SCS jupyter notebook demo

the following notebook aims to be a demonstration of how to integrate SCS into a scanpy-driven analysis workflow

In [None]:
# create temp directory to store SCS inputs

import tempfile

temp_dir = tempfile.TemporaryDirectory()

In [None]:
# generate anndata object from GSE entry GSE258835

import tarfile
from io import BytesIO
from urllib.request import urlopen

import anndata as ad
import numpy as np
import pandas as pd
import scanpy as sc

# read in tar archive from GEO entry
with urlopen(
    "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE258835&format=file"
) as f:
    tar = tarfile.open(fileobj=BytesIO(f.read()))

# read in each individual sample
adatas = []
for sample in tar.getmembers():
    # extract counts and centroids into anndata
    sample_tar = tarfile.open(fileobj=BytesIO(tar.extractfile(sample).read()))
    adata = ad.AnnData(
        pd.read_csv(
            BytesIO(sample_tar.extractfile("./counts.csv").read()), index_col=0
        ).T
    )
    adata.obsm["spatial"] = (
        pd.read_csv(
            BytesIO(sample_tar.extractfile("./centroids.csv").read()),
            index_col=0,
            dtype={"cell": str},
        )
        .set_index("cell")
        .reindex(adata.obs_names)
        .values
    )
    adatas.append(adata)

# offset each spatial view so that they're nicely laid out
max_width, max_height = np.array(
    list(
        map(
            lambda adata: adata.obsm["spatial"].max(axis=0)
            - adata.obsm["spatial"].min(axis=0),
            adatas,
        )
    )
).max(axis=0)
for idx, adata in enumerate(adatas):
    adata.obsm["spatial"] = (
        adata.obsm["spatial"]
        - adata.obsm["spatial"].mean(axis=0)
        + np.array([(idx % 3) * max_width * 1.1, (idx // 3) * max_height * 1.1])
    )

# generate final anndata object, keeping only cells with non zero counts
adata = ad.concat(adatas)
adata = adata[adata.X.sum(axis=1) != 0].copy()

# run normalization/log-scaling
adata.layers["counts"] = adata.X.copy()
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)

In [None]:
# generate SCS input directory

from pathlib import Path

from from_anndata import from_anndata

print("generating scs input directory (can take a couple minutes)")
from_anndata(adata, Path(temp_dir.name), "spatial")
print("finished scs input directory")

In [None]:
# run SCS and integrate into anndata object

from io import BytesIO
from subprocess import check_output

import pandas as pd

# run SCS and select cells visually, saving output from stdout to bytes
scs_out = check_output(["cargo", "-q", "run", "--release", temp_dir.name])

# read SCS output to pandas dataframe, setting the df index to the cell column
scs_out_df = pd.read_csv(BytesIO(scs_out), sep="\t", dtype={"cell": str}).set_index(
    "cell"
)

# save to metadata column
adata.obs["scs_selection"] = scs_out_df

In [None]:
# demonstrate difference between full and subsetted data
print(f"all cells: {adata}")
print(f"subsetted to only selected cells: {adata[adata.obs["scs_selection"]]}")

In [None]:
# remove tempdir once finished
temp_dir.cleanup()