In [9]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import nibabel as nib
from scipy.ndimage import zoom
from glob import glob
from tqdm import tqdm

In [10]:
# Define paths
base_path = 'data'
segmentations_path = os.path.join(base_path, 'segmentations')
train_images_path1 = os.path.join(base_path, 'sfd', 'train_images')
train_images_path2 = os.path.join(base_path, 'train_images')
output_path = os.path.join(base_path, 'processed_3d')

# Create output directory if it doesn't exist
os.makedirs(output_path, exist_ok=True)

In [11]:
def load_and_process_image_volume(study_path, target_size=(128, 128, 128)):
    # List and sort slice file paths
    t_paths = sorted(glob(os.path.join(study_path, "*")),
                     key=lambda x: int(os.path.basename(x).split('.')[0]))
    
    # Determine the number of scans and calculate quantile indices
    n_scans = len(t_paths)
    indices = np.quantile(list(range(n_scans)), np.linspace(0., 1., target_size[2])).round().astype(int)
    t_paths = [t_paths[i] for i in indices]
    
    # Load and process slices
    slices = []
    for img_path in t_paths:
        img = Image.open(img_path)
        img = img.resize((target_size[0], target_size[1]))
        slices.append(np.array(img))
    
    # Stack slices into a 3D volume
    volume = np.stack(slices, axis=-1)
    
    # Normalize and scale the volume
    volume = volume - np.min(volume)
    volume = volume / (np.max(volume) + 1e-4)
    volume = (volume * 255).astype(np.uint8)
    
    return volume

def load_and_process_mask(mask_path, target_size=(128, 128, 128), num_classes=7):
    # Load the mask using nibabel
    mask_org = nib.load(mask_path).get_fdata()

    # Adjust mask orientation if needed
    mask_org = mask_org.transpose(1, 0, 2)[::-1, :, ::-1]  # Adjust orientation to (d, w, h)

    # Resize mask to target size
    if mask_org.shape != target_size:
        factors = [t / s for t, s in zip(target_size, mask_org.shape)]
        mask_org = zoom(mask_org, factors, order=0)  # Nearest-neighbor interpolation for masks

    # Create multi-channel mask
    mask = np.zeros((num_classes, target_size[0], target_size[1], target_size[2]))
    for cid in range(num_classes):
        mask[cid] = (mask_org == (cid + 1))

    # Convert mask to [0, 255] and return as uint8
    mask = mask.astype(np.uint8) * 255

    return mask

In [20]:
# Get list of study IDs with segmentations
segmentation_ids = [f.split('.')[:-1] for f in os.listdir(segmentations_path) if f.endswith('.nii')]

for study_id_parts in tqdm(segmentation_ids, desc="Processing studies"):
    study_id = '.'.join(study_id_parts)
    # Check for study folder in both locations
    study_path = None
    if os.path.exists(os.path.join(train_images_path1, study_id)):
        study_path = os.path.join(train_images_path1, study_id)
    elif os.path.exists(os.path.join(train_images_path2, study_id)):
        study_path = os.path.join(train_images_path2, study_id)
    
    if study_path is None:
        print(f"Warning: No image folder found for study {study_id}")
        continue
    
    # Process image volume
    image_volume = load_and_process_image_volume(study_path)
    
    # Process mask
    mask_path = os.path.join(segmentations_path, f"{study_id}.nii")
    mask_volume = load_and_process_mask(mask_path)
    
    # Save processed data
    np.save(os.path.join(output_path, f"{study_id}_image.npy"), image_volume)
    np.save(os.path.join(output_path, f"{study_id}_mask.npy"), mask_volume)

print("Processing complete!")

Processing studies: 100%|██████████| 87/87 [05:15<00:00,  3.62s/it]

Processing complete!





In [13]:
class TimmSegModel(nn.Module):
    def __init__(self, backbone, segtype='unet', pretrained=False):
        super(TimmSegModel, self).__init__()

        self.encoder = timm.create_model(
            backbone,
            in_chans=3,
            features_only=True,
            drop_rate=drop_rate,
            drop_path_rate=drop_path_rate,
            pretrained=pretrained
        )

NameError: name 'nn' is not defined