# Setup environment

In [1]:
# install openslide dependencies
!sudo apt-get install openslide-tools
!sudo apt-get install python-openslide
!pip install openslide-python

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  libopenslide0
Suggested packages:
  libtiff-tools
The following NEW packages will be installed:
  libopenslide0 openslide-tools
0 upgraded, 2 newly installed, 0 to remove and 38 not upgraded.
Need to get 104 kB of archives.
After this operation, 297 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libopenslide0 amd64 3.4.1+dfsg-5build1 [89.8 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 openslide-tools amd64 3.4.1+dfsg-5build1 [13.8 kB]
Fetched 104 kB in 0s (208 kB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 78, <> line 2.)
debconf: falling back to frontend: Readline
debconf: unable to initialize frontend: Readli

In [2]:
# install flamingo and histogpt
!pip install flamingo-pytorch --no-deps
!pip install git+https://github.com/marrlab/HistoGPT

Collecting flamingo-pytorch
  Downloading flamingo_pytorch-0.1.2-py3-none-any.whl (7.8 kB)
Installing collected packages: flamingo-pytorch
Successfully installed flamingo-pytorch-0.1.2
Collecting git+https://github.com/marrlab/HistoGPT
  Cloning https://github.com/marrlab/HistoGPT to /tmp/pip-req-build-c8vgjkpd
  Running command git clone --filter=blob:none --quiet https://github.com/marrlab/HistoGPT /tmp/pip-req-build-c8vgjkpd
  Resolved https://github.com/marrlab/HistoGPT to commit 93095662228fb52d756eefaefe4da8a67406f7a5
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting einops>=0.4 (from histogpt==1.1.1)
  Downloading einops-0.7.0-py3-none-any.whl (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting einops-exts (from histogpt==1.1.1)
  Downloading einops_exts-0.0.4-py3-none-any.whl (3.9 kB)
Collecting openai>=1.14.0 (from histogpt==1.1.1)
  Downloading openai-1.14.0-py3-none-any

In [3]:
# check whether to use a gpu or cpu
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


# Simple examples

In [4]:
# make a forward pass through the model
from transformers import BioGptConfig
from histogpt.models import HistoGPTForCausalLM, PerceiverResamplerConfig

histogpt = HistoGPTForCausalLM(BioGptConfig(), PerceiverResamplerConfig())
histogpt = histogpt.to(device)

text = torch.randint(0, 42384, (1, 256)).to(device)
image = torch.rand(1, 1024, 768).to(device)

print(histogpt(text, image).logits.size())

torch.Size([1, 256, 42384])


In [5]:
# generate text autoregressively
from histogpt.helpers.inference import generate

output = generate(
    model=histogpt,
    prompt=torch.randint(0, 42384, (1, 2)),
    image=torch.rand(1, 2, 768),
    length=256,
    top_k=40,
    top_p=0.95,
    temp=0.7,
    device=device
)

print(output.size())

                                                 

torch.Size([1, 258])




# Generate reports from features

In [6]:
# download model weights
!wget https://huggingface.co/marr-peng-lab/histogpt/resolve/main/histogpt-1b-6k-pruned.pth?download=true

--2024-03-15 18:59:42--  https://huggingface.co/marr-peng-lab/histogpt/resolve/main/histogpt-1b-6k-pruned.pth?download=true
Resolving huggingface.co (huggingface.co)... 18.239.50.80, 18.239.50.49, 18.239.50.16, ...
Connecting to huggingface.co (huggingface.co)|18.239.50.80|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs-us-1.huggingface.co/repos/f6/8f/f68faf0906e39e8c3590cdbdd523457dc01bcea2a52d9de48cd7b06821eaac6a/16835f1069ffcfb5b379f3d1423fbf3d99a679d1b426e7b28c4604c8e1cd6956?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27histogpt-1b-6k-pruned.pth%3B+filename%3D%22histogpt-1b-6k-pruned.pth%22%3B&Expires=1710788383&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcxMDc4ODM4M319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2Y2LzhmL2Y2OGZhZjA5MDZlMzllOGMzNTkwY2RiZGQ1MjM0NTdkYzAxYmNlYTJhNTJkOWRlNDhjZDdiMDY4MjFlYWFjNmEvMTY4MzVmMTA2OWZmY2ZiNWIzNzlmM2QxNDIzZm

In [7]:
# download example features
!wget https://huggingface.co/marr-peng-lab/histogpt/resolve/main/2023-03-06%2023.51.44.h5?download=true

--2024-03-15 19:00:04--  https://huggingface.co/marr-peng-lab/histogpt/resolve/main/2023-03-06%2023.51.44.h5?download=true
Resolving huggingface.co (huggingface.co)... 18.239.50.49, 18.239.50.80, 18.239.50.16, ...
Connecting to huggingface.co (huggingface.co)|18.239.50.49|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs-us-1.huggingface.co/repos/f6/8f/f68faf0906e39e8c3590cdbdd523457dc01bcea2a52d9de48cd7b06821eaac6a/72aaa4f690facfa0b02ffcec2b327e480933e134faf8633307097273294f6b49?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%272023-03-06%252023.51.44.h5%3B+filename%3D%222023-03-06+23.51.44.h5%22%3B&Expires=1710788404&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcxMDc4ODQwNH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2Y2LzhmL2Y2OGZhZjA5MDZlMzllOGMzNTkwY2RiZGQ1MjM0NTdkYzAxYmNlYTJhNTJkOWRlNDhjZDdiMDY4MjFlYWFjNmEvNzJhYWE0ZjY5MGZhY2ZhMGIwMmZmY2VjMmIzMjdlN

In [8]:
# load model weights
PATH = '/content/histogpt-1b-6k-pruned.pth?download=true'
state_dict = torch.load(PATH, map_location=device)
histogpt.load_state_dict(state_dict, strict=True)

<All keys matched successfully>

In [10]:
# get text prompt and image features
import h5py
from transformers import BioGptTokenizer

tokenizer = BioGptTokenizer.from_pretrained("microsoft/biogpt")

prompt = 'Final diagnosis:'
prompt = torch.tensor(tokenizer.encode(prompt)).unsqueeze(0).to(device)

with h5py.File('/content/2023-03-06 23.51.44.h5?download=true', 'r') as f:
    features = f['feats'][:]
    features = torch.tensor(features).unsqueeze(0).to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


vocab.json:   0%|          | 0.00/927k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/696k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/595 [00:00<?, ?B/s]

In [13]:
# generate the pathology report
output = generate(
    model=histogpt,
    prompt=prompt,
    image=features,
    length=256,
    top_k=40,
    top_p=0.95,
    temp=0.7,
    device=device
)

decoded = tokenizer.decode(output[0, 1:])
print(decoded)

                                                

Final diagnosis: Basal cell carcinoma. Microscopic findings: A punch biopsy reveals a flattened epidermis, from which branching basaloid tumor cell clusters proliferate into the upper corium. A distinctive palisade arrangement is observed in the peripheral area, along with peritumoral stroma induction. Critical findings: Superficial basal cell carcinoma is present with a tumor thickness of 0. 3 mm.




# Generate reports from images

In [14]:
# download weights of feature extractor
!wget https://huggingface.co/marr-peng-lab/histogpt/resolve/main/ctranspath.pth?download=true

--2024-03-15 19:02:15--  https://huggingface.co/marr-peng-lab/histogpt/resolve/main/ctranspath.pth?download=true
Resolving huggingface.co (huggingface.co)... 18.239.50.16, 18.239.50.103, 18.239.50.49, ...
Connecting to huggingface.co (huggingface.co)|18.239.50.16|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs-us-1.huggingface.co/repos/f6/8f/f68faf0906e39e8c3590cdbdd523457dc01bcea2a52d9de48cd7b06821eaac6a/7c998680060c8743551a412583fac689db43cec07053b72dfec6dcd810113539?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27ctranspath.pth%3B+filename%3D%22ctranspath.pth%22%3B&Expires=1710788535&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcxMDc4ODUzNX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2Y2LzhmL2Y2OGZhZjA5MDZlMzllOGMzNTkwY2RiZGQ1MjM0NTdkYzAxYmNlYTJhNTJkOWRlNDhjZDdiMDY4MjFlYWFjNmEvN2M5OTg2ODAwNjBjODc0MzU1MWE0MTI1ODNmYWM2ODlkYjQzY2VjMDcwNTNiNzJkZmVjNm

In [15]:
# download example slides
!wget https://huggingface.co/marr-peng-lab/histogpt/resolve/main/2023-03-06%2023.51.44.ndpi?download=true

--2024-03-15 19:02:22--  https://huggingface.co/marr-peng-lab/histogpt/resolve/main/2023-03-06%2023.51.44.ndpi?download=true
Resolving huggingface.co (huggingface.co)... 18.239.50.16, 18.239.50.103, 18.239.50.49, ...
Connecting to huggingface.co (huggingface.co)|18.239.50.16|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs-us-1.huggingface.co/repos/f6/8f/f68faf0906e39e8c3590cdbdd523457dc01bcea2a52d9de48cd7b06821eaac6a/0e1fad3c726e6e12d362f40f694adb76b8b293dd3a2f35e7a07305cb574cb2ca?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%272023-03-06%252023.51.44.ndpi%3B+filename%3D%222023-03-06+23.51.44.ndpi%22%3B&Expires=1710788542&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcxMDc4ODU0Mn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2Y2LzhmL2Y2OGZhZjA5MDZlMzllOGMzNTkwY2RiZGQ1MjM0NTdkYzAxYmNlYTJhNTJkOWRlNDhjZDdiMDY4MjFlYWFjNmEvMGUxZmFkM2M3MjZlNmUxMmQzNjJmNDBmNj

In [16]:
# start patching and extracting features
import os
import shutil
from histogpt.helpers.patching import main, PatchingConfigs

try:
  os.mkdir('/content/slide_folder')
  os.mkdir('/content/save_folder')
  shutil.move("/content/2023-03-06 23.51.44.ndpi?download=true", "/content/slide_folder/2023-03-06 23.51.44.ndpi")
except Exception:
  pass

configs = PatchingConfigs()
configs.slide_path = '/content/slide_folder'
configs.save_path = '/content/save_folder'
configs.model_path = '/content/ctranspath.pth?download=true'
configs.patch_size = 256
configs.white_thresh = [170, 185, 175]
configs.edge_threshold = 2
configs.resolution_in_mpp = 0.0
configs.downscaling_factor = 4.0
configs.batch_size = 16

main(configs)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
slides:   0%|          | 0/1 [00:00<?, ?it/s]
2023-03-06 23.51.44_0:   0%|          | 0/61 [00:00<?, ?it/s][A
                                                     

Time taken:  23.429484021000007 seconds




In [17]:
# having the feature vectors, we can proceed as above
import h5py

with h5py.File('/content/save_folder/h5_files/256px_ctranspath_0.0mpp_4.0xdown_normal/2023-03-06 23.51.44.h5', 'r') as f:
    features = f['feats'][:]
    print(features.shape)

(811, 768)


In [18]:
# compare with our previously extracted features
!wget https://huggingface.co/marr-peng-lab/histogpt/resolve/main/2023-03-06%2023.51.44.h5?download=true

with h5py.File('/content/2023-03-06 23.51.44.h5?download=true', 'r') as f:
    features = f['feats'][:]
    print(features.shape)

--2024-03-15 19:03:35--  https://huggingface.co/marr-peng-lab/histogpt/resolve/main/2023-03-06%2023.51.44.h5?download=true
Resolving huggingface.co (huggingface.co)... 18.239.50.16, 18.239.50.49, 18.239.50.80, ...
Connecting to huggingface.co (huggingface.co)|18.239.50.16|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs-us-1.huggingface.co/repos/f6/8f/f68faf0906e39e8c3590cdbdd523457dc01bcea2a52d9de48cd7b06821eaac6a/72aaa4f690facfa0b02ffcec2b327e480933e134faf8633307097273294f6b49?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%272023-03-06%252023.51.44.h5%3B+filename%3D%222023-03-06+23.51.44.h5%22%3B&Expires=1710788616&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcxMDc4ODYxNn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2Y2LzhmL2Y2OGZhZjA5MDZlMzllOGMzNTkwY2RiZGQ1MjM0NTdkYzAxYmNlYTJhNTJkOWRlNDhjZDdiMDY4MjFlYWFjNmEvNzJhYWE0ZjY5MGZhY2ZhMGIwMmZmY2VjMmIzMjdlN

# Load the largest, most powerful model

In [19]:
# download model weights and configs
!wget https://huggingface.co/marr-peng-lab/histogpt/resolve/main/histogpt-3b-6k-pruned.pth?download=true
!wget https://huggingface.co/marr-peng-lab/histogpt/resolve/main/biogpt-large-config.json?download=true

--2024-03-15 19:03:39--  https://huggingface.co/marr-peng-lab/histogpt/resolve/main/histogpt-3b-6k-pruned.pth?download=true
Resolving huggingface.co (huggingface.co)... 18.239.50.16, 18.239.50.49, 18.239.50.80, ...
Connecting to huggingface.co (huggingface.co)|18.239.50.16|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs-us-1.huggingface.co/repos/f6/8f/f68faf0906e39e8c3590cdbdd523457dc01bcea2a52d9de48cd7b06821eaac6a/6602576908ce06805c2b324ef15439f1e492246d8d241c4e5ef9ecf3f0292f65?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27histogpt-3b-6k-pruned.pth%3B+filename%3D%22histogpt-3b-6k-pruned.pth%22%3B&Expires=1710788619&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcxMDc4ODYxOX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2Y2LzhmL2Y2OGZhZjA5MDZlMzllOGMzNTkwY2RiZGQ1MjM0NTdkYzAxYmNlYTJhNTJkOWRlNDhjZDdiMDY4MjFlYWFjNmEvNjYwMjU3NjkwOGNlMDY4MDVjMmIzMjRlZjE1ND

In [20]:
# use the largest version of histogpt only if you have enough vram
import json

PATH = '/content/biogpt-large-config.json?download=true'
biogpt_config = BioGptConfig.from_pretrained(PATH)

PATH = '/content/histogpt-3b-6k-pruned.pth?download=true'
state_dict = torch.load(PATH, map_location=device)

histogpt = HistoGPTForCausalLM(biogpt_config, PerceiverResamplerConfig())
histogpt.load_state_dict(state_dict, strict=True)

OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 15.77 GiB of which 20.38 MiB is free. Process 18656 has 15.75 GiB memory in use. Of the allocated memory 15.14 GiB is allocated by PyTorch, and 242.78 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)