![imaging3/4](https://img.shields.io/badge/imaging3/4-lightgrey)
[![Jupyter Notebook](https://img.shields.io/badge/Source%20on%20GitHub-orange)](https://github.com/laminlabs/lamin-usecases/blob/main/docs/imaging3.ipynb)

# Featurize single-cell images

Here, we use [scPortrait](https://github.com/MannLabs/scPortrait) to extract cell features that characterize both morphological and intensity-based properties of individual cells:

- Area of the masks in pixels
- Mean intensity of the chosen channel in the regions labelled by each of the masks
- Median intensity of the chosen channel in the regions labelled by each of the masks
- 75% quantile of the chosen channel in the regions labelled by each of the masks  
- 25% quantile of the chosen channel in the regions labelled by each of the masks
- Summed intensity of the chosen channel in the regions labelled by each of the masks
- Summed intensity of the chosen channel in the region labelled by each of the masks normalized for area

These features provide a comprehensive profile for later training of machine learning models to identify cell types and states.

In [None]:
import lamindb as ln
import bionty as bt
import pandas as pd

from scportrait.pipeline.featurization import CellFeaturizer

ln.track()

We will generate these features on the basis of the previously generated single-cell image datasets.

In [None]:
# Get single-cell images and config
sc_datasets = (
    ln.Artifact.using("scportrait/examples")
    .filter(ulabels__name="autophagy imaging", is_latest=True)
    .filter(ulabels__name="scportrait single-cell images")
)
config = (
    ln.Artifact.filter(ulabels__name="autophagy imaging")
    .filter(ulabels__name="scportrait config")
    .distinct()
    .one()
)

In [None]:
# Process single-cell images with scPortrait's featurizer
featurizer = CellFeaturizer(directory=".", config=config.cache(), project_location=None)


def featurize_datasets(artifact_list) -> pd.DataFrame:
    paths = [dataset.cache() for dataset in artifact_list]
    dataset_lookup = {idx: cell.uid for idx, cell in enumerate(artifact_list)}
    labels = list(dataset_lookup.keys())
    results = featurizer.process(
        dataset_paths=paths, dataset_labels=labels, return_results=True
    )

    # ensure we store the original dataset uid to be able to track featurization results back to their original dataset
    results["dataset"] = results["label"].map(dataset_lookup)
    del results["label"]
    return results


# Train on wildtype (WT) cells
wt_cells_afs = sc_datasets.filter(ulabels__name="WT")

# we have two different conditions which will be the two classes that our classifier should be able to tell apart
condition_uls = [
    ln.ULabel.using("scportrait/examples").get(name=stim_name)
    for stim_name in {af.features.get_values()["stimulation"] for af in wt_cells_afs}
]

# map condition names to class labels
class_lookup = {"untreated": 0, "14h Torin-1": 1}

features = None
for _, condition_ul in enumerate(condition_uls):
    cells = wt_cells_afs.filter(ulabels=condition_ul)
    results = featurize_datasets(cells)

    # save condition as a class label
    results["class"] = class_lookup[condition_ul.name]

    # concatenate results together
    if features is None:
        features = results
    else:
        features = pd.concat([features, results])

Ingest the generated features to our instance:

In [None]:
artifact = ln.Artifact.from_df(
    features,
    description="featurized single-cell images",
    key="featurization_results/WT.parquet",
).save()
artifact.cell_lines.add(bt.CellLine.get(name="U2OS"))

artifact.features.add_values(
    {
        "study": "autophagy imaging",
        "genotype": "WT",
    }
)

Repeat this process for KO cells:

In [None]:
# Process KO cells to see if they behave differently
ko_cells_afs = sc_datasets.filter(ulabels__name="EI24KO")

# we have the same two conditions as before
condition_uls = [
    ln.ULabel.using("scportrait/examples").get(name=stimulation_name)
    for stimulation_name in {
        af.features.get_values()["stimulation"] for af in ko_cells_afs
    }
]

features_ko = None
for _, condition_ul in enumerate(condition_uls):
    cells = ko_cells_afs.filter(ulabels=condition_ul)
    results = featurize_datasets(cells)

    # save condition as a class label
    results["class"] = class_lookup[condition_ul.name]

    if features_ko is None:
        features_ko = results
    else:
        features_ko = pd.concat([features_ko, results])

In [None]:
artifact = ln.Artifact.from_df(
    features_ko,
    description="featurized single-cell images",
    key="featurization_results/EI24KO.parquet",
).save()
artifact.cell_lines.add(bt.CellLine.filter(name="U2OS").one())

# annotate with required metadata
artifact.features.add_values(
    {
        "study": "autophagy imaging",
        "genotype": "EI24KO",
    }
)

In [None]:
ln.finish()