In [1]:
import torch
import pyvips
import pandas as pd
import albumentationsxl as A

from pathlib import Path
from torch.utils.data import Dataset, DataLoader
from lightstream.models.resnet.resnet import StreamingResNet
from sklearn.model_selection import train_test_split
from lightning.pytorch import Trainer

In [2]:
ROOT_DIR = Path("/data/pathology/archives/breast/camelyon/CAMELYON16")
label_df = pd.read_csv(str(ROOT_DIR / Path("evaluation/reference.csv")))
image_dir = ROOT_DIR / Path("images")

label_df["label"] = label_df["class"].apply(lambda x: 0 if x =="negative" else 1)

In [5]:
test_df = label_df[label_df["image"].str.startswith("test")]
train_df = label_df[label_df["image"].str.startswith("normal") | label_df["image"].str.startswith("tumor")]
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42,stratify=train_df["label"])

In [4]:
# Normalizing with imagenet statistics is done during streaming in scnn.py, so we don't do that here
image_size=4096
train_transforms  = A.Compose([A.CropOrPad(image_size, image_size), A.Flip(p=0.5), A.ToDtype("float", scale=True), A.ToTensor()])
test_transforms = A.Compose([A.CropOrPad(image_size, image_size), A.ToDtype("float", scale=True), A.ToTensor()])

In [6]:
class CamelyonDataset(Dataset):
    def __init__(self, image_dir: list, df: pd.DataFrame, transform: A.Compose| None=None):
        self.image_dir = image_dir
        self.df = df
        self.transforms = transform
        self.df["image_path"] = self.df["image"].apply(lambda x: image_dir / Path(x).with_suffix(".tif"))
        
        self.images = self.df["image_path"].tolist()
        self.labels = self.df["label"].tolist()
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, item):
        try:
            image = pyvips.Image.new_from_file(self.images[item], level=5)[0:3]
        except Exception as e:
            image = pyvips.Image.new_from_file(self.images[item], page=5)[0:3]
            
        label = self.labels[item]
        if self.transforms is not None:
            image = self.transforms(image=image)["image"]
        return image, label
        

In [7]:
train_loader = DataLoader(dataset=CamelyonDataset(image_dir=image_dir, df=train_df, transform=train_transforms), num_workers=1)
valid_loader = DataLoader(dataset=CamelyonDataset(image_dir=image_dir, df=val_df, transform=test_transforms), num_workers=1)
test_loader = DataLoader(dataset=CamelyonDataset(image_dir=image_dir, df=test_df, transform=test_transforms), num_workers=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.df["image_path"] = self.df["image"].apply(lambda x: image_dir / Path(x).with_suffix(".tif"))


In [8]:
model = StreamingResNet(model_name="resnet18", tile_size=2880, num_classes=2, train_streaming_layers=False, loss_fn=torch.nn.CrossEntropyLoss())

metrics None
No tile cache found, calculating it now

Converting modules to nn.Identity()
Executing pre-streaming initialization callbacks (if any):

Initializing streaming model
Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) 
 Lost(top:2.0, left:2.0, bottom:1.0, right:1.0)
MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) 
 Lost(top:2.0, left:2.0, bottom:1.0, right:1.0)
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) 
 Lost(top:3.0, left:3.0, bottom:2.0, right:2.0)
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) 
 Lost(top:4.0, left:4.0, bottom:3.0, right:3.0)
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) 
 Lost(top:5.0, left:5.0, bottom:4.0, right:4.0)
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) 
 Lost(top:6.0, left:6.0, bottom:5.0, right:5.0)
Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 

In [9]:
trainer = Trainer(
        default_root_dir="./",
        accelerator="gpu",
        max_epochs=15,
        devices=1,
        precision="16-mixed",
        strategy="auto",
    )

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [10]:
trainer.fit(model=model, train_dataloaders=train_loader, val_dataloaders=valid_loader)

Missing logger folder: ./lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | stream_network | StreamingCNN     | 11.2 M
1 | head           | Sequential       | 1.0 K 
2 | loss_fn        | CrossEntropyLoss | 0     
----------------------------------------------------
1.0 K     Trainable params
11.2 M    Non-trainable params
11.2 M    Total params
44.710    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=5` in the `DataLoader` to improve performance.
/usr/local/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=5` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=15` reached.
