In [35]:
import cucim
import sys
sys.path.insert(0,'../..')
import os
import pandas as pd
import numpy as np
from histopathology.datasets.base_dataset import SlidesDataset
import preprocessing.tiling as tiling

## Setup and utils

In [16]:
root_path = "/tmp/datasets"
input_file =  os.path.join(root_path, "train_images/0005f7aaab2800f6170c399693a96917.tiff")
train_csv = os.path.join(root_path, "train.csv")

In [17]:
from matplotlib import pyplot as plt
def visualize(image, dpi=80.0):
    height, width, _ = image.shape
    plt.figure(figsize=(width / dpi, height / dpi))
    plt.axis('off')
    plt.imshow(image)

In [20]:
from pathlib import Path
from typing import Union, Optional
class PandaDataset(SlidesDataset):
    """Dataset class for loading files from the PANDA challenge dataset.

    Iterating over this dataset returns a dictionary following the `SlideKey` schema plus meta-data
    from the original dataset (`'data_provider'`, `'isup_grade'`, and `'gleason_score'`).

    Ref.: https://www.kaggle.com/c/prostate-cancer-grade-assessment/overview
    """
    SLIDE_ID_COLUMN = 'image_id'
    IMAGE_COLUMN = 'image'
    MASK_COLUMN = 'mask'
    LABEL_COLUMN = 'isup_grade'

    METADATA_COLUMNS = ('data_provider', 'isup_grade', 'gleason_score')

    DEFAULT_CSV_FILENAME = "train.csv"

    def __init__(self,
                 root: Union[str, Path],
                 dataset_csv: Optional[Union[str, Path]] = None,
                 dataset_df: Optional[pd.DataFrame] = None) -> None:
        super().__init__(root, dataset_csv, dataset_df, validate_columns=False)
        # PANDA CSV does not come with paths for image and mask files
        slide_ids = self.dataset_df.index
        self.dataset_df[self.IMAGE_COLUMN] = "train_images/" + slide_ids + ".tiff"
        self.dataset_df[self.MASK_COLUMN] = "train_label_masks/" + slide_ids + "_mask.tiff"
        self.validate_columns()

## Load Slide Dataset

In [30]:
dataset = PandaDataset(root=root_path)
slide = dataset.__getitem__(0)
slide.keys()

dict_keys([<SlideKey.SLIDE_ID: 'slide_id'>, <SlideKey.IMAGE: 'image'>, <SlideKey.IMAGE_PATH: 'image_path'>, <SlideKey.MASK: 'mask'>, <SlideKey.MASK_PATH: 'mask_path'>, <SlideKey.LABEL: 'label'>, <SlideKey.METADATA: 'metadata'>])

In [36]:
slide["image"]

'/tmp/datasets/train_images/0005f7aaab2800f6170c399693a96917.tiff'

In [None]:
tiling