# Splitting volumes into patches
1. Centroid approach: patches centered around lesions
2. Sliding window approach: all patches containing min number of lesion labels
* create sliding windows patches
  * keep the ones with lesion voxels
* get xyz ranges of patches
* use ranges to crop other channels
* save patches
* save ranges in filenames or in pickles to crop more channels in the future

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
from pathlib import Path

import SimpleITK as sitk
import cc3d
import numpy as np
import pandas as pd
from tqdm import tqdm
from patching import get_lesion_patch_simple, get_sliding_patches, visualize_patch_seg, useful_patch_indices

data_folder = Path("/media/liushifeng/KINGSTON/nnUNet_raw/Dataset001_3dlesion")
train_images = data_folder / "imagesTr"
train_labels = data_folder / "labelsTr"

### Create centroid patches

In [None]:
import torch
from process import preprocess, preprocess_seg, postprocess, postprocess_seg

ct_cent_folder = Path("/media/liushifeng/KINGSTON/lesion3d/centroid/ct")
seg_cent_folder = Path("/media/liushifeng/KINGSTON/lesion3d/centroid/seg")

In [None]:
patch_counts = {}
for ct_filename in tqdm(os.listdir(train_images)):
    ct_path = train_images / ct_filename
    seg_path = train_labels / ct_filename.replace("_0000.nii.gz", ".nii.gz")

    ct = preprocess(ct_path)
    seg = preprocess_seg(seg_path)

    labels, n_components = cc3d.connected_components(seg[0] > 0, return_N=True)

    root_name = ct_filename.replace("_0000.nii.gz", ".nii.gz").split(".")[0]
    patch_counts[root_name] = n_components
    for c in range(n_components):
        coords = np.argwhere(labels == (c + 1))
        centroid = coords.float().mean(axis=1).to(int)

        patch_dims = [64, 128, 128]
        seg_patch = get_lesion_patch_simple(seg[0], centroid, patch_dims)
        ct_patch = get_lesion_patch_simple(ct[0], centroid, patch_dims)

        # print(ct_patch.shape, ct_patch.min().item(), ct_patch.max().item())
        name = f"{root_name}_patch{c}.pt"
        torch.save(torch.tensor(ct_patch).half(), ct_cent_folder / name)
        torch.save(torch.tensor(seg_patch).half(), seg_cent_folder / name)

In [None]:
## for visualizing patches
n = 374
visualize_patch_seg(ct_patch, seg_patch);

### Create sliding window patches
save patch only if there's sufficient label pixels

In [None]:
import torch
from process import preprocess, preprocess_seg, postprocess, postprocess_seg

ct_folder = Path("/media/liushifeng/KINGSTON/lesion3d/sliding_window/ct")
seg_folder = Path("/media/liushifeng/KINGSTON/lesion3d/sliding_window/seg")

In [None]:
patch_counts = {}
for ct_filename in tqdm(os.listdir(train_images)):
    ct_path = train_images / ct_filename
    seg_path = train_labels / ct_filename.replace("_0000.nii.gz", ".nii.gz")

    ct = preprocess(ct_path)
    seg = preprocess_seg(seg_path)

    patch_size = [64, 128, 128]
    overlap_ratio = 0.5

    all_ct_patches, _ = get_sliding_patches(ct[0], patch_size, overlap_ratio)
    all_seg_patches, zyx_ranges = get_sliding_patches(seg[0], patch_size, overlap_ratio)

    indices = useful_patch_indices(all_seg_patches, min_voxels=30)
    ct_patches = [p for i, p in enumerate(all_ct_patches) if i in indices]
    seg_patches = [p for i, p in enumerate(all_seg_patches) if i in indices]

    root_name = ct_filename.replace("_0000.nii.gz", ".nii.gz").split(".")[0]
    patch_counts[root_name] = 0
    for i in range(len(ct_patches)):
        name = f"{root_name}_patch{i}.pt"
        torch.save(torch.tensor(ct_patches[i]).half(), ct_folder / name)
        torch.save(torch.tensor(seg_patches[i]).half(), seg_folder / name)
        patch_counts[root_name] += 1

In [None]:
## for visualizing patches
n = 374
visualize_patch_seg(ct_patches[0], seg_patches[0]);

In [None]:
# visualize number of patches per image
df = pd.DataFrame([[k,v] for k, v in patch_counts.items()])
df.columns = ["n", "c"]
df.hist(bins=100, figsize=(5,3))