In [1]:
!pip install torchstain --no-index --find-links=file:///kaggle/input/torchstain
import torchstain
print("torchstain package successfully imported!")

Looking in links: file:///kaggle/input/torchstain
Processing /kaggle/input/torchstain/torchstain-1.3.0-py3-none-any.whl
Installing collected packages: torchstain
Successfully installed torchstain-1.3.0
torchstain package successfully imported!


In [2]:
!ls /kaggle/input/pyvips-python-and-deb-package-gpu
# intall the deb packages
!yes | dpkg -i --force-depends /kaggle/input/pyvips-python-and-deb-package-gpu/linux_packages/archives/*.deb
# install the python wrapper
!pip install pyvips -f /kaggle/input/pyvips-python-and-deb-package-gpu/python_packages/ --no-index

linux_packages	python_packages
Selecting previously unselected package apparmor.
(Reading database ... 113818 files and directories currently installed.)
Preparing to unpack .../apparmor_3.0.4-2ubuntu2.2_amd64.deb ...
Unpacking apparmor (3.0.4-2ubuntu2.2) ...
Selecting previously unselected package autoconf.
Preparing to unpack .../autoconf_2.71-2_all.deb ...
Unpacking autoconf (2.71-2) ...
Selecting previously unselected package automake.
Preparing to unpack .../automake_13a1.16.5-1.3_all.deb ...
Unpacking automake (1:1.16.5-1.3) ...
Selecting previously unselected package autotools-dev.
Preparing to unpack .../autotools-dev_20220109.1_all.deb ...
Unpacking autotools-dev (20220109.1) ...
Selecting previously unselected package bzip2-doc.
Preparing to unpack .../bzip2-doc_1.0.8-5build1_all.deb ...
Unpacking bzip2-doc (1.0.8-5build1) ...
Selecting previously unselected package file.
Preparing to unpack .../file_13a5.41-3ubuntu0.1_amd64.deb ...
Unpacking file (1:5.41-3

In [3]:
import os, glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

DATASET_FOLDER = "/kaggle/input/UBC-OCEAN/"
IMAGES_FOLDER = "./test_tiles"

os.environ['VIPS_CONCURRENCY'] = '4'
os.environ['VIPS_DISC_THRESHOLD'] = '15gb'

In [4]:
labels=['HGSC', 'LGSC', 'EC', 'CC', 'MC', 'Other']

In [5]:
import os
import pyvips
import numpy as np
import random
from PIL import Image
#切割大图像为小块，包括随机子采样、黑边过滤和大小调整
def extract_image_tiles(
    p_img, folder, size: int = 2048, scale: float = 0.5,
    drop_thr: float = 0.6, white_thr: int = 240, max_samples: int = 50
) -> list:
    name, _ = os.path.splitext(os.path.basename(p_img))
    im = pyvips.Image.new_from_file(p_img)
    w = h = size

    idxs = [(y, y + h, x, x + w) for y in range(0, im.height, h) for x in range(0, im.width, w)]
    # random subsample
    max_samples = max_samples if isinstance(max_samples, int) else int(len(idxs) * max_samples)
    random.shuffle(idxs)
    files = []
    for y, y_, x, x_ in idxs:
        tile = im.crop(x, y, min(w, im.width - x), min(h, im.height - y)).numpy()[..., :3]
        if tile.shape[:2] != (h, w):
            tile_ = tile
            tile_size = (h, w) if tile.ndim == 2 else (h, w, tile.shape[2])
            tile = np.zeros(tile_size, dtype=tile.dtype)
            tile[:tile_.shape[0], :tile_.shape[1], ...] = tile_
        black_bg = np.sum(tile, axis=2) == 0
        tile[black_bg, :] = 255
        mask_bg = np.mean(tile, axis=2) > white_thr
        if np.sum(mask_bg) >= (np.prod(mask_bg.shape) * drop_thr):
            continue
        p_img = os.path.join(folder, f"{int(x_ / w)}-{int(y_ / h)}.png")
        # print(tile.shape, tile.dtype, tile.min(), tile.max())
        new_size = int(size * scale), int(size * scale)
        Image.fromarray(tile).resize(new_size, Image.LANCZOS).save(p_img)
        files.append(p_img)
        if len(files) >= max_samples:
            break
    return files

In [6]:
#从图像中提取并裁剪图块，切割大图像为小块，包括随机子采样、黑边过滤和大小调整
def extract_prune_tiles(
    path_img: str, folder: str, size: int = 2048, scale: float = 0.25,
    drop_thr: float = 0.6, max_samples: int = 30
) -> str:
    print(f"processing: {path_img}")
    name, _ = os.path.splitext(os.path.basename(path_img))
    folder = os.path.join(folder, name)
    os.makedirs(folder, exist_ok=True)
    tiles = extract_image_tiles(
        path_img, folder, size=size, scale=scale,
        drop_thr=drop_thr, max_samples=max_samples)
    return folder

In [7]:
import joblib
from torchvision import transforms
import torchstain
import cv2

def normalize_stain(image, target_image_path="/kaggle/input/targetimage/target.joblib"):
    try:
        target = joblib.load(target_image_path)

        T = transforms.Compose([
            transforms.ToTensor(),
            transforms.Lambda(lambda x: x * 255)
        ])

        normalizer = torchstain.normalizers.MacenkoNormalizer(backend='torch')
        normalizer.fit(T(target))
        t_to_transform = T(image)

        normalized, _, _ = normalizer.normalize(I=t_to_transform, stains=False)
        normalized = normalized.float() / 255.0
        normalized = normalized.numpy()

    except Exception as e:
        normalized = image  # or normalized = image.clone() if you want a copy

    return normalized

In [8]:
from torchvision import transforms as T

imagenet_mean = np.array([0.485, 0.456, 0.406])
imagenet_std = np.array([0.229, 0.224, 0.225])

VALID_TRANSFORM = T.Compose([
    T.CenterCrop(512),
    
    normalize_stain,             
    T.ToTensor(), 
    T.Normalize(imagenet_mean, imagenet_std ),  # custom
])

In [9]:
import torch
from PIL import Image
from torch.utils.data import Dataset

class TilesFolderDataset(Dataset):

    def __init__(
        self,
        folder: str,
        image_ext: str =  '.png',
        transforms = None
    ):
        assert os.path.isdir(folder)
        self.transforms = transforms
        self.imgs = glob.glob(os.path.join(folder, "*" + image_ext))

    def __getitem__(self, idx: int) -> tuple:
        img_path = self.imgs[idx]
        assert os.path.isfile(img_path), f"missing: {img_path}"
        img = np.array(Image.open(img_path))[..., :3]
        # filter background
        mask = np.sum(img, axis=2) == 0
        img[mask, :] = 255
        if np.max(img) < 1.5:
            img = np.clip(img * 255, 0, 255).astype(np.uint8)
        # augmentation
        if self.transforms:
            img = self.transforms(Image.fromarray(img))
        return img

    def __len__(self) -> int:
        return len(self.imgs)

In [10]:
import timm
import torch
import torchvision
import pytorch_lightning as pl
from torch import nn
from torch.nn import functional as F

# Load the checkpoint
PATH_CKPT = '/kaggle/input/other-model-training-torch/efficientNet.pth'
model =torch.load(PATH_CKPT)



In [11]:
df_test = pd.read_csv(os.path.join(DATASET_FOLDER, "test.csv"))
#df_test = pd.read_csv(os.path.join(DATASET_FOLDER, "train.csv"))

# default label
df_test['label'] = ['Other'] * len(df_test)
print(f"Dataset/test size: {len(df_test)}")
display(df_test.head())

Dataset/test size: 1


Unnamed: 0,image_id,image_width,image_height,label
0,41,28469,16987,Other


In [12]:
!cat /kaggle/input/UBC-OCEAN/sample_submission.csv

image_id,label
41,HGSC


## Inference

In [13]:
import scipy
import shutil
from torch.utils.data import DataLoader

model.eval()
model = model.cuda()

submission = []
for _, row in df_test.iterrows():
    row = dict(row)
    # prepare data - cut and load tiles
    folder_tiles = extract_prune_tiles(
        os.path.join(DATASET_FOLDER, "test_images", f"{str(row['image_id'])}.png"),
        IMAGES_FOLDER, size=2048, scale=0.25)
    dataset = TilesFolderDataset(folder_tiles, transforms=VALID_TRANSFORM)
    if not len(dataset):
        print (f"seem no tiles were cut for `{folder_tiles}`")
        submission.append(row)
        continue
    dataloader = DataLoader(dataset, batch_size=4, num_workers=10, shuffle=False)
    # iterate over images and collect predictions
    preds = []
    for imgs in dataloader:
        with torch.no_grad():
            pred = model(imgs.cuda())
        preds += pred.cpu().numpy().tolist()
    probs = scipy.special.softmax(preds, axis=1)[..., :-1]

    lb = np.argmax(np.sum(probs, axis=0))
    row['label'] = labels[lb]
    print(row)
    submission.append(row)
    os.system(f"rm -rf {folder_tiles}")

df_sub = pd.DataFrame(submission)

processing: /kaggle/input/UBC-OCEAN/test_images/41.png




{'image_id': 41, 'image_width': 28469, 'image_height': 16987, 'label': 'HGSC'}


## Finalize - export submission

In [14]:
display(df_sub.head())
df_sub[["image_id", "label"]].to_csv("submission.csv", index=False)

! head submission.csv

Unnamed: 0,image_id,image_width,image_height,label
0,41,28469,16987,HGSC


image_id,label
41,HGSC
