# Run HistoQC on whole slide images

Among other things, we get tissue masks from HistoQC. Thanks, HistoQC.

In [None]:
%%bash
docker run --rm \
    -v /etc/passwd:/etc/passwd:ro \
    -v /etc/group:/etc/group:ro \
    --user $(id -u):$(id -g) \
    -v $(pwd)/data:/data \
    -w /data \
    histoqc \
        python -m histoqc --outdir /data/histoqc-outputs/ '/data/wsi/*.svs'

# Tile the tissue in whole slide images

Install `tyler`:

In [None]:
%pip install --no-cache-dir https://github.com/kaczmarj/tyler/tarball/main

Tile images:

In [None]:
%%bash
for wsi in data/wsi/*.svs; do
  wsi_name="$(basename $wsi)"
  mask="data/histoqc-outputs/${wsi_name}/${wsi_name}_mask_use.png"
  output="data/tiles/"
  if [ -n "$(ls -A "$output/$wsi_name")" ]; then
    echo "Skipping because output directory contains outputs: $output/$wsi_name"
    continue
  fi
  python -m tyler --output "$output" --tile-size 1000 1000 --strides 1000 1000 --force "$wsi" "$mask"
done

In [None]:
!ls data/tiles/TCGA-3L-AA1B-01A-01-TS1.9C415218-D5B4-4945-B243-F42A4C8C0484.svs/ | wc -l
!ls data/tiles/TCGA-3L-AA1B-01Z-00-DX2.17CE3683-F4B1-4978-A281-8F620C4D77B4.svs/ | wc -l

# Visualize potential augmentations

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import torch
import torchvision.transforms as T

torch.manual_seed(42)

In [None]:
def plot(imgs, with_orig=True, row_title=None, **imshow_kwargs):
    if not isinstance(imgs[0], list):
        # Make a 2d grid even if there's just 1 row
        imgs = [imgs]

    num_rows = len(imgs)
    num_cols = len(imgs[0]) + with_orig
    fig, axs = plt.subplots(nrows=num_rows, ncols=num_cols, squeeze=False, figsize=(15, 8))
    for row_idx, row in enumerate(imgs):
        row = [orig_img] + row if with_orig else row
        for col_idx, img in enumerate(row):
            ax = axs[row_idx, col_idx]
            ax.imshow(np.asarray(img), **imshow_kwargs)
            ax.set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])

    if with_orig:
        axs[0, 0].set(title='Original image')
        axs[0, 0].title.set_size(8)
    if row_title is not None:
        for row_idx in range(num_rows):
            axs[row_idx, 0].set(ylabel=row_title[row_idx])

    plt.tight_layout()

In [None]:
orig_img = Image.open("data/tiles/TCGA-3L-AA1B-01A-01-TS1.9C415218-D5B4-4945-B243-F42A4C8C0484.svs/100000_10000_1000_1000_0.2525_0.2525.png")
orig_img = img.convert("RGB").resize((256, 256))

## ColorJitter

In [None]:
tform = T.ColorJitter(brightness=0.5, hue=0.2, contrast=0.5, saturation=0.1)
plot([[tform(orig_img) for _ in range(5)] for _ in range(3)])

## GaussianBlur

In [None]:
tform = T.GaussianBlur(kernel_size=(3, 9), sigma=(0.1, 2))
plot([[tform(orig_img) for _ in range(5)] for _ in range(3)])

## RandomAffine

In [None]:
tform = T.RandomAffine(degrees=180, scale=(0.75, 1.25), shear=15, interpolation=T.InterpolationMode.BILINEAR)
plot([[tform(orig_img) for _ in range(5)] for _ in range(3)])

## RandomEqualize

In [None]:
tform = T.RandomEqualize(p=0.25)
plot([[tform(orig_img) for _ in range(5)] for _ in range(3)])

## AutoAugment

In [None]:
policies = [T.AutoAugmentPolicy.CIFAR10, T.AutoAugmentPolicy.IMAGENET, T.AutoAugmentPolicy.SVHN]
augmenters = [T.AutoAugment(policy) for policy in policies]
imgs = [
    [augmenter(orig_img) for _ in range(8)]
    for augmenter in augmenters
]
row_title = [str(policy).split('.')[-1] for policy in policies]
plot(imgs, row_title=row_title)

# Implement data pipeline

In [None]:
from vision import HistologyPatchDataset

In [None]:
hd = HistologyPatchDataset("data/tiles/")
hd

In [None]:
len(hd)