In [3]:
from matplotlib import pyplot as plt
import numpy as np
from pathlib import Path
import openslide
import tifffile
import PIL.Image
import h5py
import torch

import sys
sys.path.append("/workspaces/TRIDENT")

from extract_patches import random_sample, entropy_bin_sampling, entropy_top_sampling


AttributeError: partially initialized module 'torchvision' has no attribute 'extension' (most likely due to a circular import)

In [None]:
def extract_root_image_size(tif_path: Path) -> tuple[int, int]:
    with tifffile.TiffFile(tif_path) as tif:
        return (
            tif.pages[0].tags["ImageWidth"].value,
            tif.pages[0].tags["ImageLength"].value,
        )


In [None]:
tcga_example = Path("/mnt/nfs03-R6/staining/TCGA_256_40/TCGA-DB-A4XB-01Z-00-DX1.FBF60AEC-EF2E-4771-A46C-7ABF60C99D9C.svs")
h5_dir = Path("/mnt/nfs03-R6/staining/trident/40x_512px_0px_overlap/patches/")
h5_example = h5_dir / tcga_example.name.replace(".svs", "_patches.h5")


In [None]:
# write tiff tags to txt file
def write_tiff_tags_to_txt(tiff_path, output_txt_path):
    with tifffile.TiffFile(tiff_path) as tif:
        with open(output_txt_path, 'w') as f:
            for page in tif.pages:
                tags = page.tags
                f.write(f"Page {page.index}:\n")
                for tag in tags.values():
                    if tag.name in {"TileByteCounts", "TileOffsets"}:
                        # convert to list of integers
                        # truncate to first 10 values for readability
                        tag_value = list(tag.value)
                        number_of_values = len(tag_value)
                        if number_of_values > 10:
                            tag_value = tag_value[:10]
                        f.write(f"{tag.name} ({number_of_values} values): {tag_value}...\n")
                    elif tag.name in {"InterColorProfile", "JPEGTables"}:
                        # write the first 10 bytes of the binary data
                        tag_value = tag.value[:10]
                        f.write(f"{tag.name} (first 10 bytes): {tag_value}...\n")
                    else:
                        f.write(f"{tag.name}: {tag.value}\n")
                f.write("\n\n")

# Example usage
write_tiff_tags_to_txt(tcga_example, Path("/workspaces/TRIDENT") / tcga_example.with_suffix('.txt').name)


In [None]:
def get_thumbnail(svs_path: Path):
    """
    Get the thumbnail of a slide.
    """
    slide = openslide.open_slide(str(svs_path))
    slide_width, slide_length = extract_root_image_size(svs_path)
    patch_size = 512
    edge_size = max(slide_width, slide_length) / patch_size
    thumbnail = slide.get_thumbnail((edge_size, edge_size))
    return thumbnail

def visualize_coords(svs_path, coords):
    """
    Visualize the coordinates on the thumbnail.
    """
    thumbnail = get_thumbnail(svs_path)
    # we need to the coords to the thumbnail size
    slide_width, slide_length = extract_root_image_size(svs_path)
    thumbnail_width, thumbnail_length = thumbnail.size
    coords = [(int(x * thumbnail_width / slide_width), int(y * thumbnail_length / slide_length)) for x, y in coords]
    # create a new image with the thumbnail
    new_image = PIL.Image.new("RGB", thumbnail.size)
    new_image.paste(thumbnail, (0, 0))
    # draw the coords
    for x, y in coords:
        new_image.putpixel((x, y), (0, 255, 0))
    return new_image

def visualize_patches(svs_path, coords):
    """
    Visualize the patches on the thumbnail.
    """
    slide = openslide.open_slide(str(svs_path))
    patches = []
    for x, y in coords:
        # get the patch
        patch = slide.read_region((x, y), 0, (512, 512))
        patch = patch.resize((32, 32))
        patches.append(patch)
    # create one big image from the patches
    # should be roughly square
    num_patches = len(patches)
    num_cols = int(np.sqrt(num_patches))
    num_rows = int(np.ceil(num_patches / num_cols))
    patch_width, patch_length = patches[0].size
    new_image = PIL.Image.new("RGB", (num_cols * patch_width, num_rows * patch_length))
    for i, patch in enumerate(patches):
        x = i % num_cols
        y = i // num_cols
        new_image.paste(patch, (x * patch_width, y * patch_length))
    return new_image

    
entropy_coords = entropy_bin_sampling(num_bins=10, h5_path=h5_example, num_samples=1000, ignore_k_bins=2)
print(f"Entropy coords: {entropy_coords.shape}")
entropy_thumbnail = visualize_coords(tcga_example, entropy_coords)
entropy_patches = visualize_patches(tcga_example, entropy_coords)

entropy_binned_top_coords = entropy_top_sampling(num_bins=10, h5_path=h5_example, num_samples=1000, ignore_k_bins=2)
print(f"Entropy top coords: {entropy_binned_top_coords.shape}")
entropy_binned_top_thumbnail = visualize_coords(tcga_example, entropy_binned_top_coords)
entropy_binned_top_patches = visualize_patches(tcga_example, entropy_binned_top_coords)

entropy_top_coords = entropy_top_sampling(num_bins=1, h5_path=h5_example, num_samples=1000, ignore_k_bins=0)
print(f"Entropy top coords: {entropy_top_coords.shape}")
entropy_top_thumbnail = visualize_coords(tcga_example, entropy_top_coords)
entropy_top_patches = visualize_patches(tcga_example, entropy_top_coords)

random_coords = random_sample(h5_path=h5_example, num_samples=1000)
print(f"Random coords: {random_coords.shape}")
random_thumbnail = visualize_coords(tcga_example, random_coords)
random_patches = visualize_patches(tcga_example, random_coords)

# show the two thumbnails
fig, axs = plt.subplots(4, 2, figsize=(20, 20))
axs[0, 0].imshow(entropy_thumbnail)
axs[0, 0].set_title("Entropy")
axs[0, 1].imshow(entropy_patches)
axs[0, 1].set_title("Entropy Patches")

axs[1, 0].imshow(entropy_binned_top_thumbnail)
axs[1, 0].set_title("Entropy binned Top")
axs[1, 1].imshow(entropy_binned_top_patches)
axs[1, 1].set_title("Entropy binned Top Patches")

axs[2, 0].imshow(entropy_top_thumbnail)
axs[2, 0].set_title("Entropy Top")
axs[2, 1].imshow(entropy_top_patches)
axs[2, 1].set_title("Entropy Top Patches")

axs[3, 0].imshow(random_thumbnail)
axs[3, 0].set_title("Random")
axs[3, 1].imshow(random_patches)
axs[3, 1].set_title("Random Patches")
plt.show()


In [None]:
def plot_entropy_distribution(
    h5_path: Path, sampled_coords: torch.Tensor = None, title: str = "Entropy Distribution"
) -> torch.Tensor:
    with h5py.File(h5_path, "r") as f:
        coords = torch.tensor(f["coords"][:], dtype=torch.long)
        bytecounts = torch.tensor(f["bytecounts"][:], dtype=torch.float)
    if bytecounts.shape[0] != coords.shape[0]:
        raise ValueError("coords and bytecounts must have the same length")
    if bytecounts.ndim > 1:
        bytecounts = torch.mean(bytecounts, dim=1)
    
    # get the indices of the sampled coords
    if sampled_coords is not None:
        sampled_coords = torch.tensor(sampled_coords, dtype=torch.long)
        indices = []
        for coord in sampled_coords:
            index = torch.where(
                (coords[:, 0] == coord[0]) & (coords[:, 1] == coord[1])
            )[0]
            if index.shape[0] > 0:
                indices.append(index[0])
        bytecounts = bytecounts[indices]
    

    fig, ax = plt.subplots(figsize=(20, 10))
    ax.hist(bytecounts, bins=100, density=True)
    ax.set_xlabel("Entropy")
    ax.set_ylabel("Density")
    ax.set_title(title)
    plt.show()

plot_entropy_distribution(h5_example, title="Entropy Distribution of all tissue patches")
plot_entropy_distribution(h5_example, sampled_coords=entropy_coords, title="Entropy Distribution of entropy sampled patches")
plot_entropy_distribution(h5_example, sampled_coords=entropy_binned_top_coords, title="Entropy Distribution of entropy binned top sampled patches")
plot_entropy_distribution(h5_example, sampled_coords=entropy_top_coords, title="Entropy Distribution of entropy top sampled patches")
plot_entropy_distribution(h5_example, sampled_coords=random_coords, title="Entropy Distribution of random sampled patches")
