In [31]:
import os
# insert token
hf_token = os.getenv("HF_TOKEN")
assert hf_token, "HF_TOKEN is not set"

# Authenticate to access Google Cloud resources
from google.colab import auth
auth.authenticate_user()


In [14]:
!apt-get install -y python3-dev
!pip install --no-binary :all: crcmod

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
python3-dev is already the newest version (3.10.6-1~22.04.1).
python3-dev set to manually installed.
0 upgraded, 0 newly installed, 0 to remove and 34 not upgraded.
Collecting crcmod
  Downloading crcmod-1.7.tar.gz (89 kB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m89.7/89.7 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: crcmod
  Building wheel for crcmod (setup.py) ... [?25l[?25hdone
  Created wheel for crcmod: filename=crcmod-1.7-cp311-cp311-linux_x86_64.whl size=31658 sha256=43a34b56d31267cf01452ffcb15533388c3095d49d976f7d64c06d3b2ebf9932
  Stored in directory: /root/.cache/pip/wheels/23/94/7a/8cb7d14597e6395ce969933f01aed9ea8fa5f5b4d4c8a61e99
Successfully built crcmod
Installing collected 

In [15]:

from google.colab import auth
auth.authenticate_user()

from google.cloud import storage
from pathlib import Path
import os

In [10]:
import os
from pathlib import Path
import torch
import timm
from PIL import Image
from torchvision import transforms
from tqdm import tqdm
import subprocess

# Set paths
gcs_bucket = "bracs-dataset-bucket"
tile_root_gcs = f"gs://{gcs_bucket}/Tiles/train"
embedding_root_gcs = f"gs://{gcs_bucket}/Embeddings/train"
tile_root_local = Path("/content/tiles/train")
embedding_root_local = Path("/content/embeddings/train")

tile_root_local.mkdir(parents=True, exist_ok=True)
embedding_root_local.mkdir(parents=True, exist_ok=True)

# Load model
tile_encoder = timm.create_model("hf_hub:prov-gigapath/prov-gigapath", pretrained=True).cuda().eval()
print("‚úÖ Tile Encoder loaded.")
print("üßÆ Total parameters:", sum(p.numel() for p in tile_encoder.parameters()))

# Image transform
transform = transforms.Compose([
    transforms.Resize(256, interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])


‚úÖ Tile Encoder loaded.
üßÆ Total parameters: 1134953984


In [28]:
def encode_slide_tiles(slide_id: str):
    slide_tile_dir = tile_root_local / slide_id
    save_path = embedding_root_local / slide_id / f"{slide_id}_embeddings.pt"
    save_path.parent.mkdir(parents=True, exist_ok=True)

    image_paths = sorted(slide_tile_dir.glob("*.png"))
    print(f"üìÇ {slide_id}: {len(image_paths)} tiles")

    all_embeddings = []
    all_coords = []

    for img_path in tqdm(image_paths, desc=f"üß† Encoding {slide_id}"):
        img = Image.open(img_path).convert("RGB")
        x = transform(img).unsqueeze(0).cuda()

        with torch.no_grad():
            embed = tile_encoder(x).squeeze().cpu()

        # Extract (x, y) from filename: "x1234_y5678.png"
        name_parts = img_path.stem.split("_")
        x_coord = int(name_parts[0][1:])
        y_coord = int(name_parts[1][1:])

        all_embeddings.append(embed)
        all_coords.append([x_coord, y_coord])

        print(f"üìè {img_path.name}: embedding shape {embed.shape}")

    # Stack and save
    embeddings_tensor = torch.stack(all_embeddings)
    coords_tensor = torch.tensor(all_coords)

    print(f"üìä {slide_id}: embeddings shape {embeddings_tensor.shape}, coords shape {coords_tensor.shape}")

    torch.save({
        "embeddings": embeddings_tensor,
        "coords": coords_tensor
    }, save_path)

    print(f"‚úÖ Saved slide embeddings to {save_path}")


In [23]:
def run_encoder_pipeline_from_gcs():
    # List all subfolders in the GCS bucket
    list_command = f"gsutil ls {tile_root_gcs}/"
    result = subprocess.run(list_command.split(), capture_output=True, text=True)
    slide_dirs = [line.strip().rstrip("/") for line in result.stdout.strip().split("\n")]

    for slide_path in slide_dirs:
        slide_id = Path(slide_path).name
        print(f"\nüîΩ Downloading tiles for {slide_id}...")

        # Download to /content/tiles/train/BRACS_xxx
        local_slide_dir = tile_root_local / slide_id
        subprocess.run([
    "gsutil", "-m", "cp", "-r", str(slide_path), str(local_slide_dir.parent)
], check=True)

        # Run inference
        encode_slide_tiles(slide_id)

        # Upload embeddings
        gcs_target = f"{embedding_root_gcs}/{slide_id}"
        subprocess.run(["gsutil", "-m", "cp", "-r", str(embedding_root_local / slide_id), gcs_target], check=True)
        print(f"‚òÅÔ∏è Uploaded embeddings for {slide_id} to {gcs_target}")

        # Cleanup
        subprocess.run(["rm", "-rf", str(local_slide_dir)])
        subprocess.run(["rm", "-rf", str(embedding_root_local / slide_id)])


In [29]:
run_encoder_pipeline_from_gcs()



üîΩ Downloading tiles for BRACS_1003728...
üìÇ BRACS_1003728: 109 tiles


üß† Encoding BRACS_1003728:   2%|‚ñè         | 2/109 [00:00<00:06, 16.78it/s]

üìè x10304_y46144.png: embedding shape torch.Size([1536])
üìè x10304_y46592.png: embedding shape torch.Size([1536])
üìè x107968_y61824.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:   4%|‚ñé         | 4/109 [00:00<00:05, 18.28it/s]

üìè x114240_y57792.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:   6%|‚ñã         | 7/109 [00:00<00:05, 19.66it/s]

üìè x116032_y63168.png: embedding shape torch.Size([1536])
üìè x122752_y11200.png: embedding shape torch.Size([1536])
üìè x122752_y12096.png: embedding shape torch.Size([1536])
üìè x122752_y12544.png: embedding shape torch.Size([1536])
üìè x123648_y8512.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  12%|‚ñà‚ñè        | 13/109 [00:00<00:04, 20.48it/s]

üìè x124096_y13440.png: embedding shape torch.Size([1536])
üìè x124096_y13888.png: embedding shape torch.Size([1536])
üìè x124096_y14336.png: embedding shape torch.Size([1536])
üìè x124096_y15232.png: embedding shape torch.Size([1536])
üìè x124096_y33152.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  15%|‚ñà‚ñç        | 16/109 [00:00<00:04, 20.60it/s]

üìè x124544_y12544.png: embedding shape torch.Size([1536])
üìè x124544_y13888.png: embedding shape torch.Size([1536])
üìè x124544_y14336.png: embedding shape torch.Size([1536])
üìè x124544_y14784.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  17%|‚ñà‚ñã        | 19/109 [00:00<00:04, 20.73it/s]

üìè x124544_y15232.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  20%|‚ñà‚ñà        | 22/109 [00:01<00:04, 20.71it/s]

üìè x124544_y16128.png: embedding shape torch.Size([1536])
üìè x124544_y16576.png: embedding shape torch.Size([1536])
üìè x124992_y13440.png: embedding shape torch.Size([1536])
üìè x124992_y14336.png: embedding shape torch.Size([1536])
üìè x124992_y14784.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  26%|‚ñà‚ñà‚ñå       | 28/109 [00:01<00:03, 20.84it/s]

üìè x124992_y15232.png: embedding shape torch.Size([1536])
üìè x124992_y15680.png: embedding shape torch.Size([1536])
üìè x124992_y16128.png: embedding shape torch.Size([1536])
üìè x124992_y16576.png: embedding shape torch.Size([1536])
üìè x124992_y17024.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  28%|‚ñà‚ñà‚ñä       | 31/109 [00:01<00:03, 20.85it/s]

üìè x125440_y12992.png: embedding shape torch.Size([1536])
üìè x125440_y13888.png: embedding shape torch.Size([1536])
üìè x125440_y14336.png: embedding shape torch.Size([1536])
üìè x125440_y14784.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  31%|‚ñà‚ñà‚ñà       | 34/109 [00:01<00:03, 20.89it/s]

üìè x125888_y13440.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  34%|‚ñà‚ñà‚ñà‚ñç      | 37/109 [00:01<00:03, 20.92it/s]

üìè x125888_y13888.png: embedding shape torch.Size([1536])
üìè x125888_y14336.png: embedding shape torch.Size([1536])
üìè x15680_y58688.png: embedding shape torch.Size([1536])
üìè x16128_y59584.png: embedding shape torch.Size([1536])
üìè x22400_y55104.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  39%|‚ñà‚ñà‚ñà‚ñâ      | 43/109 [00:02<00:03, 20.95it/s]

üìè x22400_y58240.png: embedding shape torch.Size([1536])
üìè x23296_y56000.png: embedding shape torch.Size([1536])
üìè x23296_y59584.png: embedding shape torch.Size([1536])
üìè x24192_y58240.png: embedding shape torch.Size([1536])
üìè x24192_y58688.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 46/109 [00:02<00:03, 20.96it/s]

üìè x24192_y59136.png: embedding shape torch.Size([1536])
üìè x24192_y59584.png: embedding shape torch.Size([1536])
üìè x24192_y60032.png: embedding shape torch.Size([1536])
üìè x25088_y59584.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  45%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 49/109 [00:02<00:02, 20.98it/s]

üìè x28672_y60032.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 52/109 [00:02<00:02, 20.88it/s]

üìè x32256_y39872.png: embedding shape torch.Size([1536])
üìè x33600_y45248.png: embedding shape torch.Size([1536])
üìè x33600_y9408.png: embedding shape torch.Size([1536])
üìè x34048_y5824.png: embedding shape torch.Size([1536])
üìè x34048_y8512.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 58/109 [00:02<00:02, 20.89it/s]

üìè x34048_y8960.png: embedding shape torch.Size([1536])
üìè x34048_y9408.png: embedding shape torch.Size([1536])
üìè x34496_y5824.png: embedding shape torch.Size([1536])
üìè x34496_y8064.png: embedding shape torch.Size([1536])
üìè x34944_y5824.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 61/109 [00:02<00:02, 20.87it/s]

üìè x35840_y37632.png: embedding shape torch.Size([1536])
üìè x35840_y38528.png: embedding shape torch.Size([1536])
üìè x3584_y24640.png: embedding shape torch.Size([1536])
üìè x36288_y37632.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 64/109 [00:03<00:02, 20.87it/s]

üìè x36288_y38080.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 67/109 [00:03<00:02, 20.87it/s]

üìè x36288_y6272.png: embedding shape torch.Size([1536])
üìè x36288_y6720.png: embedding shape torch.Size([1536])
üìè x36736_y11648.png: embedding shape torch.Size([1536])
üìè x36736_y12096.png: embedding shape torch.Size([1536])
üìè x36736_y6720.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 73/109 [00:03<00:01, 20.91it/s]

üìè x37184_y11200.png: embedding shape torch.Size([1536])
üìè x37184_y11648.png: embedding shape torch.Size([1536])
üìè x37184_y12096.png: embedding shape torch.Size([1536])
üìè x37632_y10752.png: embedding shape torch.Size([1536])
üìè x37632_y11200.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 76/109 [00:03<00:01, 20.95it/s]

üìè x37632_y11648.png: embedding shape torch.Size([1536])
üìè x38080_y10752.png: embedding shape torch.Size([1536])
üìè x41664_y4928.png: embedding shape torch.Size([1536])
üìè x41664_y5376.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 79/109 [00:03<00:01, 20.97it/s]

üìè x42112_y4928.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 82/109 [00:03<00:01, 20.90it/s]

üìè x42112_y5376.png: embedding shape torch.Size([1536])
üìè x42560_y4480.png: embedding shape torch.Size([1536])
üìè x43904_y43456.png: embedding shape torch.Size([1536])
üìè x43904_y43904.png: embedding shape torch.Size([1536])
üìè x44352_y43456.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 88/109 [00:04<00:01, 20.94it/s]

üìè x44352_y43904.png: embedding shape torch.Size([1536])
üìè x44800_y43904.png: embedding shape torch.Size([1536])
üìè x4480_y25536.png: embedding shape torch.Size([1536])
üìè x4480_y25984.png: embedding shape torch.Size([1536])
üìè x46144_y45248.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 91/109 [00:04<00:00, 20.92it/s]

üìè x46592_y45248.png: embedding shape torch.Size([1536])
üìè x46592_y45696.png: embedding shape torch.Size([1536])
üìè x4928_y25984.png: embedding shape torch.Size([1536])
üìè x5824_y25088.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 94/109 [00:04<00:00, 20.89it/s]

üìè x6720_y26432.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 97/109 [00:04<00:00, 20.90it/s]

üìè x71232_y59136.png: embedding shape torch.Size([1536])
üìè x71680_y58688.png: embedding shape torch.Size([1536])
üìè x71680_y59136.png: embedding shape torch.Size([1536])
üìè x8064_y26880.png: embedding shape torch.Size([1536])
üìè x8064_y30464.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 103/109 [00:04<00:00, 20.89it/s]

üìè x8064_y33152.png: embedding shape torch.Size([1536])
üìè x8512_y29568.png: embedding shape torch.Size([1536])
üìè x8512_y30016.png: embedding shape torch.Size([1536])
üìè x8512_y30912.png: embedding shape torch.Size([1536])
üìè x90048_y51968.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 106/109 [00:05<00:00, 20.88it/s]

üìè x90048_y52416.png: embedding shape torch.Size([1536])
üìè x94080_y30016.png: embedding shape torch.Size([1536])
üìè x9856_y33152.png: embedding shape torch.Size([1536])
üìè x9856_y46144.png: embedding shape torch.Size([1536])


üß† Encoding BRACS_1003728: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 109/109 [00:05<00:00, 20.78it/s]

üìè x9856_y46592.png: embedding shape torch.Size([1536])
üìä BRACS_1003728: embeddings shape torch.Size([109, 1536]), coords shape torch.Size([109, 2])
‚úÖ Saved slide embeddings to /content/embeddings/train/BRACS_1003728/BRACS_1003728_embeddings.pt





‚òÅÔ∏è Uploaded embeddings for BRACS_1003728 to gs://bracs-dataset-bucket/Embeddings/train/BRACS_1003728
