Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

fixing PandaInnereyeSSLMIL #625

Merged
merged 12 commits into from
Jan 11, 2022
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ gets uploaded to AzureML, by skipping all test folders.
- ([#606](https://github.com/microsoft/InnerEye-DeepLearning/pull/606)) Bug fix: registered models do not include the hi-ml submodule
- ([#593](https://github.com/microsoft/InnerEye-DeepLearning/pull/593)) Bug fix for hi-ml 0.1.11 issue (#130): empty mount point is turned into ".", which fails the AML job
- ([#587](https://github.com/microsoft/InnerEye-DeepLearning/pull/587)) Bug fix for regression in AzureML's handling of environments: upgrade to hi-ml 0.1.11
- ([#625](https://github.com/microsoft/InnerEye-DeepLearning/pull/625)) updates to PandaDeepMIL to enable the use of a SSL pre-trained checkpoint and updated commit to hi-ml
- ([#537](https://github.com/microsoft/InnerEye-DeepLearning/pull/537)) Print warning if inference is disabled but comparison requested.
- ([#567](https://github.com/microsoft/InnerEye-DeepLearning/pull/567)) fix pillow version.
- ([#546](https://github.com/microsoft/InnerEye-DeepLearning/pull/546)) Environment and hello_world_model documentation updated
Expand Down
13 changes: 13 additions & 0 deletions InnerEye/Common/fixed_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,19 @@ def repository_root_directory(path: Optional[PathOrString] = None) -> Path:
return root


def repository_parent_directory(path: Optional[PathOrString] = None) -> Path:
"""
Gets the full path to the parent directory that holds the present repository.
:param path: if provided, a relative path to append to the absolute path to the repository root.
:return: The full path to the repository's root directory, with symlinks resolved if any.
"""
root = repository_root_directory().parent
if path:
return root / path
else:
return root


INNEREYE_PACKAGE_NAME = "InnerEye"
# Child paths to include in a registered model that live outside InnerEye/.
ENVIRONMENT_YAML_FILE_NAME = "environment.yml"
Expand Down
25 changes: 15 additions & 10 deletions InnerEye/ML/Histopathology/models/deepmil.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,11 @@ def get_classifier(self) -> Callable:

def get_loss(self) -> Callable:
if self.n_classes > 1:
return nn.CrossEntropyLoss(weight=self.class_weights)
if self.class_weights is None:
return nn.CrossEntropyLoss()
else:
class_weights = self.class_weights.float()
return nn.CrossEntropyLoss(weight=class_weights)
else:
pos_weight = None
if self.class_weights is not None:
Expand Down Expand Up @@ -263,9 +267,11 @@ def test_epoch_end(self, outputs: List[Dict[str, Any]]) -> None: # type: ignore
list_slide_dicts.append(slide_dict)
list_encoded_features.append(results[ResultsKey.IMAGE][slide_idx])

print(f"Metrics results will be output to {fixed_paths.repository_root_directory()}/outputs")
csv_filename = fixed_paths.repository_root_directory() / Path('outputs/test_output.csv')
encoded_features_filename = fixed_paths.repository_root_directory() / Path('outputs/test_encoded_features.pickle')
outputs_path = fixed_paths.repository_parent_directory() / 'outputs'
print(f"Metrics results will be output to {outputs_path}")
outputs_fig_path = outputs_path / 'fig'
csv_filename = outputs_path / 'test_output.csv'
encoded_features_filename = outputs_path / 'test_encoded_features.pickle'

# Collect the list of dictionaries in a list of pandas dataframe and save
df_list = []
Expand All @@ -288,24 +294,23 @@ def test_epoch_end(self, outputs: List[Dict[str, Any]]) -> None: # type: ignore

for key in report_cases.keys():
print(f"Plotting {key} ...")
output_path = Path(fixed_paths.repository_root_directory(), f'outputs/fig/{key}/')
Path(output_path).mkdir(parents=True, exist_ok=True)
key_folder_path = outputs_fig_path / f'{key}'
Path(key_folder_path).mkdir(parents=True, exist_ok=True)
nslides = len(report_cases[key][0])
for i in range(nslides):
slide, score, paths, top_attn = report_cases[key][0][i]
fig = plot_slide_noxy(slide, score, paths, top_attn, key + '_top', ncols=4)
figpath = Path(output_path, f'{slide}_top.png')
figpath = Path(key_folder_path, f'{slide}_top.png')
fig.savefig(figpath, bbox_inches='tight')

slide, score, paths, bottom_attn = report_cases[key][1][i]
fig = plot_slide_noxy(slide, score, paths, bottom_attn, key + '_bottom', ncols=4)
figpath = Path(output_path, f'{slide}_bottom.png')
figpath = Path(key_folder_path, f'{slide}_bottom.png')
fig.savefig(figpath, bbox_inches='tight')

print("Plotting histogram ...")
fig = plot_scores_hist(results)
output_path = Path(fixed_paths.repository_root_directory(), 'outputs/fig/hist_scores.png')
fig.savefig(output_path, bbox_inches='tight')
fig.savefig(outputs_fig_path / 'hist_scores.png', bbox_inches='tight')

@staticmethod
def normalize_dict_for_df(dict_old: Dict[str, Any], use_gpu: bool) -> Dict:
Expand Down
12 changes: 1 addition & 11 deletions InnerEye/ML/configs/histo_configs/classification/BaseMIL.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,14 @@
It is responsible for instantiating the encoder and full DeepMIL model. Subclasses should define
their datamodules and configure experiment-specific parameters.
"""
import os
from pathlib import Path
from typing import Type

import param
from torch import nn
from torchvision.models.resnet import resnet18

from health_azure.utils import CheckpointDownloader, get_workspace
from health_ml.networks.layers.attention_layers import AttentionLayer, GatedAttentionLayer
from InnerEye.Common import fixed_paths
from InnerEye.ML.lightning_container import LightningContainer
from InnerEye.ML.Histopathology.datamodules.base_module import CacheMode, TilesDataModule
from InnerEye.ML.Histopathology.models.deepmil import DeepMILModule
Expand Down Expand Up @@ -56,14 +53,7 @@ def cache_dir(self) -> Path:

def setup(self) -> None:
if self.encoder_type == InnerEyeSSLEncoder.__name__:
self.downloader = CheckpointDownloader(
aml_workspace=get_workspace(),
run_id="updated_transforms:updated_transforms_1636471522_5473e3ff",
checkpoint_filename="best_checkpoint.ckpt",
download_dir='outputs/'
)
os.chdir(fixed_paths.repository_root_directory())
self.downloader.download_checkpoint_if_necessary()
raise NotImplementedError("InnerEyeSSLEncoder requires a pre-trained checkpoint.")

self.encoder = self.get_encoder()
self.encoder.cuda()
Expand Down
20 changes: 20 additions & 0 deletions InnerEye/ML/configs/histo_configs/classification/DeepSMILECrck.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,14 @@
"""
from pathlib import Path
from typing import Any, Dict
import os

from monai.transforms import Compose
from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint

from health_ml.networks.layers.attention_layers import GatedAttentionLayer
from health_azure.utils import get_workspace
from health_azure.utils import CheckpointDownloader
from InnerEye.Common import fixed_paths
from InnerEye.ML.configs.histo_configs.classification.BaseMIL import BaseMIL
from InnerEye.ML.Histopathology.datamodules.base_module import TilesDataModule
Expand Down Expand Up @@ -87,6 +90,23 @@ def cache_dir(self) -> Path:
f"/tmp/innereye_cache1/{self.__class__.__name__}-{self.encoder_type}/"
)

def setup(self) -> None:
if self.encoder_type == InnerEyeSSLEncoder.__name__:
from InnerEye.ML.configs.histo_configs.run_ids import innereye_ssl_checkpoint_crck_4ws
self.downloader = CheckpointDownloader(
azure_config_json_path=get_workspace(),
run_id=innereye_ssl_checkpoint_crck_4ws,
checkpoint_filename="best_checkpoint.ckpt",
download_dir="outputs/",
remote_checkpoint_dir=Path("outputs/checkpoints")
)
os.chdir(fixed_paths.repository_parent_directory())
self.downloader.download_checkpoint_if_necessary()

self.encoder = self.get_encoder()
self.encoder.cuda()
self.encoder.eval()
dccastro marked this conversation as resolved.
Show resolved Hide resolved

def get_data_module(self) -> TilesDataModule:
image_key = TcgaCrck_TilesDataset.IMAGE_COLUMN
transform = Compose(
Expand Down
35 changes: 25 additions & 10 deletions InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from InnerEye.Common import fixed_paths
from InnerEye.ML.Histopathology.datamodules.panda_module import PandaTilesDataModule
from InnerEye.ML.Histopathology.datasets.panda_tiles_dataset import PandaTilesDataset
from InnerEye.ML.common import get_best_checkpoint_path

from InnerEye.ML.Histopathology.models.transforms import (
EncodeTilesBatchd,
Expand All @@ -27,7 +28,6 @@
InnerEyeSSLEncoder,
)
from InnerEye.ML.configs.histo_configs.classification.BaseMIL import BaseMIL
from InnerEye.ML.configs.histo_configs.run_ids import innereye_ssl_checkpoint


class DeepSMILEPanda(BaseMIL):
Expand All @@ -43,6 +43,7 @@ def __init__(self, **kwargs: Any) -> None:
num_epochs=200,
recovery_checkpoint_save_interval=10,
recovery_checkpoints_save_last_k=-1,
# use_mixed_precision = True,
# declared in WorkflowParams:
number_of_cross_validation_splits=5,
cross_validation_split_index=0,
Expand Down Expand Up @@ -76,13 +77,15 @@ def cache_dir(self) -> Path:

def setup(self) -> None:
if self.encoder_type == InnerEyeSSLEncoder.__name__:
from InnerEye.ML.configs.histo_configs.run_ids import innereye_ssl_checkpoint_binary
self.downloader = CheckpointDownloader(
azure_config_json_path=get_workspace(),
run_recovery_id=innereye_ssl_checkpoint,
checkpoint_filename="last.ckpt",
aml_workspace=get_workspace(),
run_id=innereye_ssl_checkpoint_binary, # innereye_ssl_checkpoint
checkpoint_filename="best_checkpoint.ckpt", # "last.ckpt",
download_dir="outputs/",
remote_checkpoint_dir=Path("outputs/checkpoints")
)
os.chdir(fixed_paths.repository_root_directory())
os.chdir(fixed_paths.repository_parent_directory())
self.downloader.download_checkpoint_if_necessary()
self.encoder = self.get_encoder()
self.encoder.cuda()
Expand Down Expand Up @@ -118,11 +121,23 @@ def get_path_to_best_checkpoint(self) -> Path:
was applied there.
"""
# absolute path is required for registering the model.
return (
fixed_paths.repository_root_directory()
/ self.checkpoint_folder_path
/ self.best_checkpoint_filename_with_suffix
)
absolute_checkpoint_path = Path(fixed_paths.repository_root_directory(),
self.checkpoint_folder_path,
self.best_checkpoint_filename_with_suffix)
if absolute_checkpoint_path.is_file():
return absolute_checkpoint_path

absolute_checkpoint_path_parent = Path(fixed_paths.repository_parent_directory(),
self.checkpoint_folder_path,
self.best_checkpoint_filename_with_suffix)
if absolute_checkpoint_path_parent.is_file():
return absolute_checkpoint_path_parent

checkpoint_path = get_best_checkpoint_path(Path(self.checkpoint_folder_path))
if checkpoint_path.is_file():
return checkpoint_path

raise ValueError("Path to best checkpoint not found")


class PandaImageNetMIL(DeepSMILEPanda):
Expand Down
6 changes: 6 additions & 0 deletions InnerEye/ML/configs/histo_configs/run_ids.py
Original file line number Diff line number Diff line change
@@ -1 +1,7 @@
innereye_ssl_checkpoint = "hsharma_panda_explore:hsharma_panda_explore_1638437076_357167ae"
innereye_ssl_checkpoint_binary = "hsharma_panda_tiles_ssl:hsharma_panda_tiles_ssl_1639766433_161e03b9"
vale-salvatelli marked this conversation as resolved.
Show resolved Hide resolved
innereye_ssl_checkpoint_crck_4ws = "ModifyOldSSLCheckpoint:a9259fdb-3964-4c5b-8962-4660e0b79d44"
innereye_ssl_checkpoint_crck_radiomics = "ModifyOldSSLCheckpoint:704b1af8-7c75-46ed-8460-d80a0e603194"

# outdated checkpoints
# innereye_ssl_checkpoint_crck_radiomics = updated_transforms:updated_transforms_1636471522_5473e3ff
1 change: 1 addition & 0 deletions InnerEye/ML/deep_learning_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,7 @@ def logs_folder(self) -> Path:
@property
def checkpoint_folder(self) -> Path:
"""Gets the full path in which the model checkpoints should be stored during training."""
print(f"Expected Checkpoint path {self.outputs_folder / CHECKPOINT_FOLDER}")
return self.outputs_folder / CHECKPOINT_FOLDER

@property
Expand Down