## Example IMC analysis with Morpheus

#### Step 0: import required packages

In [None]:
import os
import morpheus as mp
from lightning.pytorch import seed_everything

# Set seed for reproducibility
seed_everything(42)

%reload_ext autoreload
%autoreload 2

#### Step 1: create a dataset object

In [None]:
data_path = "/groups/mthomson/zwang2/IMC/output/cedarsLiver_sz48_pxl3_nc44/temp/singlecell.csv"  # change to your own directory
dataset = mp.SpatialDataset(
    input_path=data_path,
    channel_names=[
        "CD45",
        "Glnsynthetase",
        "CD163",
        "NKG2D",
        "CCR4",
        "PDL1",
        "FAP",
        "CD11c",
        "LAG3",
        "HepPar1",
        "FOXP3",
        "aSMA",
        "CD4",
        "CD105endoglin",
        "CD68",
        "VISTA",
        "CD20",
        "CD8a",
        "TIM3",
        "CXCR4",
        "PD1",
        "iNOS",
        "CD31",
        "CYR61",
        "CDX2",
        "CAIX",
        "CD3",
        "CD44",
        "CD15",
        "CD11b",
        "HLADR",
        "IL10",
        "CXCL12",
        "HLAABC",
        "DNA1",
        "DNA2",
        "GranzymeB",
        "Ki67",
        "HistoneH3",
        "CXCR3",
        "Galectin9",
        "YAP",
        "CD14",
        "CK19",
    ],
)

In [5]:
patch_size = 16  # Patch size in pixels
pixel_size = 3  # Pixel size in microns
cell_types = ["Tcytotoxic", "Tumor"]  # Specify the cell types of interest
mask_cell_types = ["Tcytotoxic"]
dataset.generate_masked_patch(
    cell_to_mask=mask_cell_types,
    cell_types=cell_types,
    patch_size=patch_size,
    pixel_size=pixel_size,
    save=True,
)

# example metadata
print(dataset.metadata.head())

Generating patches of size 16x16 pixels
Pixel size: 3x3 microns
Cell types recorded: ['Tcytotoxic', 'Tumor']
Cell types masked: ['Tcytotoxic']
Patches saved to /groups/mthomson/zwang2/IMC/output/cedarsLiver_sz48_pxl3_nc44/temp/patch.h5
   patch_id  ImageNumber  PatientID  PatchIndex_X  PatchIndex_Y  \
0         0            2          2             0             0   
1         1            2          2             0             1   
2         2            2          2             0             2   
3         3            2          2             0             3   
4         4            2          2             0             4   

   Contains_Tcytotoxic  Contains_Tumor  
0                False            True  
1                 True            True  
2                False            True  
3                 True           False  
4                 True           False  


#### Step 2: generate data splits to prepare for model training

Next, we will need to generate train, validation, and test data splits for model training. We want to stratify our splits by the label we want to predict.

In [None]:
label_name = "Contains_Tcytotoxic"
dataset.generate_data_splits(stratify_by=label_name)

##### Step 3: train PyTorch model

In [7]:
# initialize model
model_arch = "unet"
n_channels = dataset.n_channels
img_size = dataset.img_size
model = mp.PatchClassifier(n_channels, img_size, arch=model_arch)

# train model
trainer_params = {
    "max_epochs": 100,
    "accelerator": "auto",
    "logger": False,
}
model = mp.train(
    model=model,
    dataset=dataset,
    label_name=label_name,
    trainer_params=trainer_params,
)

Using cache found in /home/zwang2/.cache/torch/hub/mateuszbuda_brain-segmentation-pytorch_master
/central/home/zwang2/.cache/pypoetry/virtualenvs/morpheus-ndDQRg-x-py3.9/lib/python3.9/site-packages/lightning/fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /central/home/zwang2/.cache/pypoetry/virtualenvs/mor ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/central/home/zwang2/.cache/pypoetry/virtualenvs/morpheus-ndDQRg-x-py3.9/lib/python3.9/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /groups/mthomson/zwang2/IMC/output/cedarsLiver_sz48_pxl3_nc44/temp/model/checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Training model with unet architecture



  | Name      | Type       | Params
-----------------------------------------
0 | predictor | Sequential | 14.7 M
-----------------------------------------
14.7 M    Trainable params
0         Non-trainable params
14.7 M    Total params
58.760    Total estimated model params size (MB)


Epoch 49: 100%|██████████| 270/270 [00:21<00:00, 12.61it/s, val_bce=0.564, val_precisio1n=0.771, val_recall=0.362, val_bmc=0.469, val_auroc=0.670, val_f1=0.484, val_acc=0.876]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 270/270 [00:21<00:00, 12.61it/s, val_bce=0.564, val_precisio1n=0.771, val_recall=0.362, val_bmc=0.469, val_auroc=0.670, val_f1=0.484, val_acc=0.876]


Restoring states from the checkpoint path at /groups/mthomson/zwang2/IMC/output/cedarsLiver_sz48_pxl3_nc44/temp/model/checkpoints/epoch=45-step=12420.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /groups/mthomson/zwang2/IMC/output/cedarsLiver_sz48_pxl3_nc44/temp/model/checkpoints/epoch=45-step=12420.ckpt


model saved to /groups/mthomson/zwang2/IMC/output/cedarsLiver_sz48_pxl3_nc44/temp/model
Testing DataLoader 0: 100%|██████████| 101/101 [00:46<00:00,  2.19it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.8920052647590637
       test_auroc           0.7620055675506592
        test_bce            0.5567107796669006
        test_bmc            0.5696919560432434
         test_f1             0.623245120048523
     test_precisio1n        0.7044439315795898
       test_recall          0.5697081685066223
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


#### Step 4: generate counterfactuals

In [None]:
# images to generate counterfactuals
select_metadata = dataset.metadata[
    (dataset.metadata["Contains_Tumor"] == 1)
    & (dataset.metadata["Contains_Tcytotoxic"] == 0)
    & (dataset.metadata["splits"] == "train")
]
# channels allowed to be perturbed
channel_to_perturb = [
    "Glnsynthetase",
    "CCR4",
    "PDL1",
    "LAG3",
    "CD105endoglin",
    "TIM3",
    "CXCR4",
    "PD1",
    "CYR61",
    "CD44",
    "IL10",
    "CXCL12",
    "CXCR3",
    "Galectin9",
    "YAP",
]

# threshold for classification
threshold = 0.5

# optimization parameters
optimization_param = {
    "use_kdtree": True,
    "theta": 40.0,
    "kappa": 0,  # set to: (threshold - 0.5) * 2
    "learning_rate_init": 0.1,
    "beta": 40.0,
    "max_iterations": 10,
    "c_init": 1000.0,
    "c_steps": 5,
}

# load model if needed
model_path = os.path.join(dataset.model_dir, "checkpoints/epoch=49-step=17400.ckpt")
model = dataset.load_model(model_path, arch="unet")

In [None]:
# Generate counterfactuals using trained model
cf = mp.get_counterfactual(
    images=select_metadata.iloc[:10],
    dataset=dataset,
    target_class=1,
    model=model,
    channel_to_perturb=channel_to_perturb,
    optimization_params=optimization_param,
    threshold=threshold,
)