## Example IMC analysis with Morpheus

#### Step 0: import required packages

In [2]:
import os

import morpheus as mp
from lightning.pytorch import seed_everything

# Set seed for reproducibility
seed_everything(42)

%reload_ext autoreload
%autoreload 2

Seed set to 42


#### Step 1: create a dataset object

In [8]:
data_path = "/groups/mthomson/zwang2/IMC/output/cedarsLiver_sz48_pxl3_nc44/temp/singlecell.csv"  # change to your own directory
dataset = mp.SpatialDataset(data_path)

In [7]:
dataset.generate_masked_patch(patch_sz=, pixel_sz=)

['CD45',
 'Glnsynthetase',
 'CD163',
 'NKG2D',
 'CCR4',
 'PDL1',
 'FAP',
 'CD11c',
 'LAG3',
 'HepPar1',
 'FOXP3',
 'aSMA',
 'CD4',
 'CD105endoglin',
 'CD68',
 'VISTA',
 'CD20',
 'CD8a',
 'TIM3',
 'CXCR4',
 'PD1',
 'iNOS',
 'CD31',
 'CYR61',
 'CDX2',
 'CAIX',
 'CD3',
 'CD44',
 'CD15',
 'CD11b',
 'HLADR',
 'IL10',
 'CXCL12',
 'HLAABC',
 'DNA1',
 'DNA2',
 'GranzymeB',
 'Ki67',
 'HistoneH3',
 'CXCR3',
 'Galectin9',
 'YAP',
 'CD14',
 'CK19']

#### Step 2: generate data splits to prepare for model training

Next, we will need to generate train, validation, and test data splits for model training. We want to stratify our splits by the label we want to predict.

In [39]:
label_name = "Tcytotoxic"
livertumor.generate_data_splits(stratify_by=label_name)

Data splits already exist in /groups/mthomson/zwang2/IMC/output/cedarsLiver_sz48_pxl3_nc44/temp/split


##### Step 3: train PyTorch model

In [None]:
# initialize model
model_arch = "unet"
n_channels = livertumor.n_channels
img_size = livertumor.img_size
model = mp.PatchClassifier(n_channels, img_size, arch=model_arch)

# train model
trainer_params = {
    "max_epochs": 2,
    "accelerator": "auto",
    "logger": False,
}
model = mp.train(
    model=model,
    dataset=livertumor,
    label_name=label_name,
    trainer_params=trainer_params,
)

#### Step 4: generate counterfactuals

In [40]:
# images to generate counterfactuals
select_metadata = livertumor.metadata[
    (livertumor.metadata["Tumor"] == 1)
    & (livertumor.metadata["Tcytotoxic"] == 0)
    & (livertumor.metadata["splits"] == "train")
]
# channels allowed to be perturbed
channel_to_perturb = [
    "Glnsynthetase",
    "CCR4",
    "PDL1",
    "LAG3",
    "CD105endoglin",
    "TIM3",
    "CXCR4",
    "PD1",
    "CYR61",
    "CD44",
    "IL10",
    "CXCL12",
    "CXCR3",
    "Galectin9",
    "YAP",
]

# threshold for classification
threshold = 0.5

# optimization parameters
optimization_param = {
    "use_kdtree": True,
    "theta": 40.0,
    "kappa": 0,  # set to: (threshold - 0.5) * 2
    "learning_rate_init": 0.1,
    "beta": 40.0,
    "max_iterations": 10,
    "c_init": 1000.0,
    "c_steps": 5,
}

# load model if needed
model_path = os.path.join(livertumor.model_dir, "checkpoints/epoch=49-step=17400.ckpt")
model = livertumor.load_model(model_path, arch="unet")

Using cache found in /home/zwang2/.cache/torch/hub/mateuszbuda_brain-segmentation-pytorch_master


In [42]:
# Generate counterfactuals using trained model
cf = mp.get_counterfactual(
    images=select_metadata.iloc[:10],
    dataset=livertumor,
    target_class=1,
    model=model,
    channel_to_perturb=channel_to_perturb,
    optimization_params=optimization_param,
    threshold=threshold,
    num_workers=2,
)

  0%|          | 0/10 [00:00<?, ?it/s]


ConnectionResetError: [Errno 104] Connection reset by peer

Traceback (most recent call last):
  File "/central/home/zwang2/anaconda3/envs/py39env/lib/python3.9/multiprocessing/resource_sharer.py", line 138, in _serve
    with self._listener.accept() as conn:
  File "/central/home/zwang2/anaconda3/envs/py39env/lib/python3.9/multiprocessing/connection.py", line 465, in accept
    deliver_challenge(c, self._authkey)
  File "/central/home/zwang2/anaconda3/envs/py39env/lib/python3.9/multiprocessing/connection.py", line 738, in deliver_challenge
    connection.send_bytes(CHALLENGE + message)
  File "/central/home/zwang2/anaconda3/envs/py39env/lib/python3.9/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/central/home/zwang2/anaconda3/envs/py39env/lib/python3.9/multiprocessing/connection.py", line 411, in _send_bytes
    self._send(header + buf)
  File "/central/home/zwang2/anaconda3/envs/py39env/lib/python3.9/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, b

TypeError: generate_one_cf() missing 8 required positional arguments: 'original_class', 'target_class', 'model', 'channel', 'channel_to_perturb', 'mu', 'stdev', and 'kdtree_path'

In [None]:
metadata = livertumor.metadata[["PatientID", "ImageNumber"]]
label = livertumor.label

# merge metadata and label using ImageNumber as the common column
metadata = metadata.merge(label, on="ImageNumber")
metadata = metadata.reset_index().rename(columns={"index": "patch_index"})

# add misc to metadata with columns "location_x_index" and "location_y_index"
metadata = metadata.join(
    pd.DataFrame(misc, columns=["location_x_index", "location_y_index"])
)

metadata.to_csv(
    "/groups/mthomson/zwang2/IMC/output/cedarsLiver_sz48_pxl3_nc44/temp/metadata.csv",
    index=False,
)

In [None]:
import h5py

with h5py.File(
    "/groups/mthomson/zwang2/IMC/output/cedarsLiver_sz48_pxl3_nc44/temp/crc.h5", "w"
) as f:
    # Create a dataset to store the images
    dset = f.create_dataset(
        "images",
        data=livertumor.intensity,
        compression="gzip",
        chunks=(100, 16, 16, 44),
        dtype=livertumor.intensity.dtype,
    )

    # Create a group to store the metadata
    metadata_numpy = livertumor.metadata.to_records(index=False)
    dset_metadata = f.create_dataset(
        "metadata", data=metadata_numpy, dtype=metadata_numpy.dtype
    )

    # Create a group to store the channel names
    dset_channel_names = f.create_dataset("channel_names", data=livertumor.channel)