### Slideflow

#### Creating and loading a project

In [2]:
import slideflow as sf
import os

os.environ["SF_SLIDE_BACKEND"] = "libvips"
os.environ["SF_BACKEND"] = "torch"

# Create project
project = sf.create_project(
root = 'slideflow',              # name
annotations = "annotations.csv", # csv with binary labels
slides = '0_V'                   # folder containing ndpi files
)

# Prepare a project and dataset
project = sf.load_project('slideflow')
dataset = project.dataset(tile_px=224, tile_um='45x', sources=['MyProject'])
dataset.summary() # 2450309 TFRecords

from slideflow.model import build_feature_extractor

dataset.extract_tiles(qc='otsu')

resnet50 = build_feature_extractor(
    'resnet50_imagenet',
    tile_px=224
)

# Calculate features for this dataset.
features = sf.DatasetFeatures(resnet50, dataset)
features.to_torch('bags_folder')

#### Dataset

In [4]:
# Prepare a project and dataset
project = sf.load_project('slideflow')
dataset = project.dataset(tile_px=224, tile_um='45x', sources=['MyProject'])
dataset.summary()

Output()

Overview:
╒═════════════════════╤═════════════════════════╕
│ Configuration file: │ slideflow/datasets.json │
│ Tile size (px):     │ 224                     │
│ Tile size (um):     │ 45x                     │
│ Slides:             │ 69                      │
│ Patients:           │ 69                      │
│ Slides with ROIs:   │ 0                       │
│ Patients with ROIs: │ 0                       │
╘═════════════════════╧═════════════════════════╛

Filters:
╒═══════════════╤════╕
│ Filters:      │ {} │
├───────────────┼────┤
│ Filter Blank: │ [] │
├───────────────┼────┤
│ Min Tiles:    │ 0  │
╘═══════════════╧════╛

Sources:

MyProject
╒═══════════╤═════════════════════╕
│ slides    │ 0_V                 │
│ roi       │ 0_V/rois            │
│ tiles     │ slideflow/tiles     │
│ tfrecords │ slideflow/tfrecords │
│ label     │ 224px_45x           │
╘═══════════╧═════════════════════╛

Number of tiles in TFRecords: 2450309
Annotation columns:
Index(['patient', 'category', 'slide'

In [36]:
dataset.extract_tiles(qc='otsu')

Output()

Output()

findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.
findfont: Font family 'Arial' not found.


Updating CSV for 57 reports.


Output()

Output()

{'0_V/20B0715-V.ndpi': <slideflow.slide.report.SlideReport at 0x7f3a7e7b80d0>,
 '0_V/17B2392-V.ndpi': <slideflow.slide.report.SlideReport at 0x7f3a7dec0ee0>,
 '0_V/18B3529-V.ndpi': <slideflow.slide.report.SlideReport at 0x7f3a7dea67a0>,
 '0_V/18B1466-V.ndpi': <slideflow.slide.report.SlideReport at 0x7f3a7e797f70>,
 '0_V/18B0543-V.ndpi': <slideflow.slide.report.SlideReport at 0x7f3a7e159060>,
 '0_V/19B3906-V.ndpi': <slideflow.slide.report.SlideReport at 0x7f3a80c6e5c0>,
 '0_V/17B0492-V.ndpi': <slideflow.slide.report.SlideReport at 0x7f3a7dea48b0>,
 '0_V/20B0597-V.ndpi': <slideflow.slide.report.SlideReport at 0x7f3a437866e0>,
 '0_V/18B0440-V.ndpi': <slideflow.slide.report.SlideReport at 0x7f3a42e77010>,
 '0_V/20B1369-V.ndpi': <slideflow.slide.report.SlideReport at 0x7f3a42e76c80>,
 '0_V/19B3612-V.ndpi': <slideflow.slide.report.SlideReport at 0x7f3a42e759f0>,
 '0_V/16B1229-V.ndpi': <slideflow.slide.report.SlideReport at 0x7f3a42e745b0>,
 '0_V/20B1068-V.ndpi': <slideflow.slide.report.Slide

<Figure size 640x480 with 0 Axes>

#### Extract features

In [30]:
from slideflow.model import build_feature_extractor

resnet50 = build_feature_extractor(
    'resnet50_imagenet',
    tile_px=224
)

# Calculate features for this dataset.
features = sf.DatasetFeatures(resnet50, dataset)

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /home/user/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:07<00:00, 13.1MB/s]
                                                            

Output()

Output()

Output()

TypeError: DatasetFeatures.__init__() missing 1 required positional argument: 'dataset'

In [31]:
features.to_torch('bags_folder')

Output()

#### Model configuration

In [32]:
from slideflow.mil import mil_config

config = mil_config('clam_sb', lr=1e-3, trainer='clam')

#### Model training

Esto es lo que no funciona. Da problemas en la parte de splits:
AnnotationsError: Missing column site.

In [None]:
# Prepare a project and dataset
full_dataset = dataset = project.dataset(tile_px=224, tile_um='45x')

# Split the dataset using three-fold, site-preserved cross-validation
splits = full_dataset.kfold_split(
    k=3,
    labels='category',
    preserved_site=True
)

# Train on each cross-fold
for train, val in splits:
    project.train_mil(
        config=config,
        outcomes='category',
        train_dataset=train_dataset,
        val_dataset=val,
        bags='bags_folder',
        attention_heatmaps=True,
        cmap='magma',
        interpolation=None
    )

Asi que he intentado hacer esto. Simplemente dividir el dataset manualmente pero da fallos en el entrenamiento.

In [38]:
train_dataset = project.dataset(tile_px=299, tile_um=302, filters={'dataset': 'train'})
eval_dataset = project.dataset(tile_px=299, tile_um=302, filters={'dataset': 'eval'})

In [39]:
project.train_mil(
    config=config,
    outcomes='category',
    train_dataset=train_dataset,
    val_dataset=eval_dataset,
    bags='bags_folder',
    attention_heatmaps=True,
    cmap='magma',
    interpolation=None
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['label'] = data[label_col].copy()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['label'] = data[label_col].copy()


Output()

KeyError: Caught KeyError in DataLoader worker process 2.
Original Traceback (most recent call last):
  File "/home/user/stage-lucas/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/user/stage-lucas/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/user/stage-lucas/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/user/stage-lucas/lib/python3.10/site-packages/slideflow/mil/clam/datasets/__init__.py", line 31, in __getitem__
    features = torch.load(self.pt_files[slide_id])
KeyError: '21B2743-V'


#### Model evaluation