In [1]:
# System and Python dependencies
!apt-get install -y openslide-tools
!pip install openslide-python scikit-image timm pandas tqdm --quiet

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
openslide-tools is already the newest version (3.4.1+dfsg-5build1).
0 upgraded, 0 newly installed, 0 to remove and 34 not upgraded.


In [2]:
# Authenticate to access Google Cloud resources
from google.colab import auth
auth.authenticate_user()

# Install OpenSlide and dependencies
!apt-get install -y openslide-tools
!pip install openslide-python

# Remove the old (slow) Python-only version
!pip uninstall -y crcmod

# Install required build tools and compile crcmod from source
!apt-get install -y python3-dev
!pip install --no-binary :all: crcmod

bucket_path = "gs://bracs-dataset-bucket/BRACS/BRACS_WSI/test/Group_AT/Type_ADH/BRACS_1892.svs"
local_path = "/content/BRACS_1892.svs"

!gsutil -m cp gs://bracs-dataset-bucket/BRACS/BRACS_WSI/test/Group_AT/Type_ADH/BRACS_1892.svs /content/BRACS_1892.svs




Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
openslide-tools is already the newest version (3.4.1+dfsg-5build1).
0 upgraded, 0 newly installed, 0 to remove and 34 not upgraded.
Found existing installation: crcmod 1.7
Uninstalling crcmod-1.7:
  Successfully uninstalled crcmod-1.7
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
python3-dev is already the newest version (3.10.6-1~22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 34 not upgraded.
Collecting crcmod
  Using cached crcmod-1.7-cp311-cp311-linux_x86_64.whl
Installing collected packages: crcmod
Successfully installed crcmod-1.7
Copying gs://bracs-dataset-bucket/BRACS/BRACS_WSI/test/Group_AT/Type_ADH/BRACS_1892.svs...
| [1/1 files][  2.2 GiB/  2.2 GiB] 100% Done  69.4 MiB/s ETA 00:00:00           
Operation completed over 1 objects/2.2 GiB.                                      


In [4]:
!git clone https://github.com/prov-gigapath/prov-gigapath.git
%cd prov-gigapath
#Remove xformers from requirements
!grep -v "xformers" requirements.txt > requirements_clean.txt
!pip install -r requirements_clean.txt

fatal: destination path 'prov-gigapath' already exists and is not an empty directory.
/content/prov-gigapath


In [4]:

!pip install setuptools==v45.3.0
!sudo apt-get install openslide-tools
!sudo apt-get install python-openslide
!pip install openslide-python
!pip install monai
!pip install torchmetrics
!pip install numpy



Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
openslide-tools is already the newest version (3.4.1+dfsg-5build1).
0 upgraded, 0 newly installed, 0 to remove and 34 not upgraded.
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
E: Unable to locate package python-openslide


In [5]:
from pathlib import Path
from gigapath.preprocessing.data.create_tiles_dataset import process_slide
import pandas as pd

def tile_one_slide(slide_file: str, save_dir: str, level: int = 0, tile_size: int = 256):
    """
    Tiles a single WSI using GigaPath's `process_slide`.
    """
    slide_id = Path(slide_file).stem
    slide_sample = {"image": slide_file, "slide_id": slide_id, "metadata": {}}
    save_dir = Path(save_dir)

    print(f"🔄 Tiling {slide_id} at level {level} with tile size {tile_size}px")
    tile_dir = process_slide(
        slide_sample,
        level=level,
        margin=0,
        tile_size=tile_size,
        foreground_threshold=None,
        occupancy_threshold=0.0,  # we’ll do Otsu filtering later
        output_dir=save_dir / slide_id / "output",
        thumbnail_dir=save_dir / slide_id / "thumb",
        tile_progress=True,
    )

    df = pd.read_csv(tile_dir / "dataset.csv")
    print(f"✅ Tiling complete: {len(df)} tiles saved to: {tile_dir}")
    return tile_dir


  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


In [None]:
# Replace with your actual slide path if different
slide_path = "/content/BRACS_1892.svs"
output_dir = "/content/tiles"

tile_dir = tile_one_slide(
    slide_file=slide_path,
    save_dir=output_dir,
    level=0,         # Level 1 ≈ 10×;
    tile_size=2000
)


🔄 Tiling BRACS_1892 at level 0 with tile size 2000px
('slide_id', 'tile_id', 'image', 'label', 'tile_x', 'tile_y', 'occupancy')


In [None]:
import timm
import torch
import glob
from tqdm import tqdm
from gigapath.pipeline import TileEncodingDataset, load_tile_encoder_transforms
from torch.utils.data import DataLoader
from pathlib import Path

def encode_slide_tiles_with_gigapath(
    tile_folder: str,
    save_path: str = "tile_embeddings.pt",
    batch_size: int = 128
):
    """
    Loads resized 256×256 tile images from a folder, runs GigaPath tile encoder,
    and saves the tile embeddings and coordinates.

    Args:
        tile_folder: Directory containing x{X}_y{Y}.png tiles
        save_path: Where to store tile_embeddings.pt
        batch_size: Batch size for inference
    """

    # Step 1: Collect tile paths
    tile_paths = sorted(glob.glob(str(Path(tile_folder) / "*.png")))
    assert len(tile_paths) > 0, f"No PNG tiles found in: {tile_folder}"

    print(f"📂 Found {len(tile_paths)} tiles in {tile_folder}")

    # Step 2: Load tile encoder model and transform
    tile_encoder = timm.create_model("hf_hub:prov-gigapath/prov-gigapath", pretrained=True)
    tile_encoder = tile_encoder.eval().cuda()
    transform = load_tile_encoder_transforms()

    # Step 3: Create dataset and dataloader
    dataset = TileEncodingDataset(tile_paths, transform=transform)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=2)

    # Step 4: Run inference
    all_embeds, all_coords = [], []

    with torch.no_grad(), torch.cuda.amp.autocast(dtype=torch.float16):
        for batch in tqdm(dataloader, desc="🔍 Running tile encoder"):
            imgs = batch["img"].cuda()
            coords = batch["coords"]  # shape: [B, 2]
            embeddings = tile_encoder(imgs).detach().cpu()
            all_embeds.append(embeddings)
            all_coords.append(coords)

    tile_embeds = torch.cat(all_embeds, dim=0)  # [N, D]
    coords = torch.cat(all_coords, dim=0)       # [N, 2]

    # Step 5: Save to .pt
    torch.save({
        "embeddings": tile_embeds,   # [N, 1536]
        "coords": coords             # [N, 2]
    }, save_path)

    print(f"✅ Saved tile embeddings to {save_path}")


In [None]:
encode_slide_tiles_with_gigapath(
    tile_folder="/content/tiles/BRACS_1892/output",
    save_path="/content/tiles/BRACS_1892/output/tile_embeddings.pt"
)
