In [28]:
import os
import shutil
import numpy as np
from sklearn.model_selection import train_test_split


In [29]:

# Define directories for Nifti image and segmentation files
nifti_img_dir ="D:/monai/dataset/nifti_files/images"        # Directory for Nifti image files
nifti_seg_dir = "D:/monai/dataset/nifti_files/labels"  # Directory for Nifti segmentation files

# Output directories
train_img_dir = "D:/monai/dataset/TrainVolumes"      # Folder for training Nifti image data
test_img_dir =  "D:/monai/dataset/TestVolumes"         # Folder for testing Nifti image data

train_seg_dir = "D:/monai/dataset/TrainSegmentation"  # Folder for training Nifti segmentation data
test_seg_dir = "D:/monai/dataset/TestSegmentation"   # Folder for testing Nifti segmentation data


In [30]:

# List all Nifti files 
nifti_img_files = [f for f in os.listdir(nifti_img_dir) if f.endswith('.nii') or f.endswith('.nii.gz')]
nifti_seg_files = [f for f in os.listdir(nifti_seg_dir) if f.endswith('.nii') or f.endswith('.nii.gz')]

# Ensure that the image and segmentation files are aligned
nifti_img_files.sort()
nifti_seg_files.sort()

assert len(nifti_img_files) == len(nifti_seg_files), "Mismatch between the number of images and segmentations."


In [31]:
import os

# Clean filenames (strip spaces, lowercase everything for consistency)
nifti_img_basenames = set(os.path.splitext(f)[0].replace('.nii', '').strip().lower() for f in nifti_img_files)
nifti_seg_basenames = set(os.path.splitext(f)[0].replace('.nii', '').strip().lower() for f in nifti_seg_files)

# Find missing labels for images
missing_labels = nifti_img_basenames - nifti_seg_basenames  # Images with no corresponding labels

# Find missing images for labels (just to double-check)
missing_images = nifti_seg_basenames - nifti_img_basenames  # Labels with no corresponding images

# Handle missing labels
if missing_labels:
    print(f"Found {len(missing_labels)} images with no corresponding labels:")
    for img_basename in missing_labels:
        img_file = next((f for f in nifti_img_files if img_basename in f.lower()), None)
        if img_file:
            print(f"Deleting {img_file} as it has no corresponding label.")
            os.remove(os.path.join(nifti_img_dir, img_file))
else:
    print("No missing labels found for any image.")

# Handle missing images
if missing_images:
    print(f"Found {len(missing_images)} labels with no corresponding images:")
    for seg_basename in missing_images:
        seg_file = next((f for f in nifti_seg_files if seg_basename in f.lower()), None)
        if seg_file:
            print(f"Warning: {seg_file} has no corresponding image file.")
else:
    print("No missing images found for any label.")

print("Process complete.")


No missing labels found for any image.
No missing images found for any label.
Process complete.


In [32]:

# Extract base filenames without extensions
nifti_img_basenames = set(os.path.splitext(f)[0].replace('.nii', '') for f in nifti_img_files)
nifti_seg_basenames = set(os.path.splitext(f)[0].replace('.nii', '') for f in nifti_seg_files)

# Find the image files without corresponding labels
missing_labels = nifti_img_basenames - nifti_seg_basenames  # Images with no corresponding labels

# Print and delete the unmatched image files
for img_basename in missing_labels:
    img_file = next((f for f in nifti_img_files if img_basename in f), None)
    if img_file:
        print(f"Deleting {img_file} as it has no corresponding label.")
        os.remove(os.path.join(nifti_img_dir, img_file))

print(f"Deleted {len(missing_labels)} image files with no corresponding labels.")


Deleted 0 image files with no corresponding labels.


In [33]:

# Split the dataset into train and test sets (80% train, 20% test)
train_img_files, test_img_files, train_seg_files, test_seg_files = train_test_split(
    nifti_img_files, nifti_seg_files, test_size=0.2, random_state=42)

# Create directories if they don't exist
os.makedirs(train_img_dir, exist_ok=True)
os.makedirs(test_img_dir, exist_ok=True)
os.makedirs(train_seg_dir, exist_ok=True)
os.makedirs(test_seg_dir, exist_ok=True)

# Move train files and corresponding segmentation files to train folders
for img_file, seg_file in zip(train_img_files, train_seg_files):
    # Move the original Nifti image file
    shutil.copy(os.path.join(nifti_img_dir, img_file), os.path.join(train_img_dir, img_file))
    
    # Move the corresponding segmentation file
    shutil.copy(os.path.join(nifti_seg_dir, seg_file), os.path.join(train_seg_dir, seg_file))

# Move test files and corresponding segmentation files to test folders
for img_file, seg_file in zip(test_img_files, test_seg_files):
    # Move the original Nifti image file
    shutil.copy(os.path.join(nifti_img_dir, img_file), os.path.join(test_img_dir, img_file))
    
    # Move the corresponding segmentation file
    shutil.copy(os.path.join(nifti_seg_dir, seg_file), os.path.join(test_seg_dir, seg_file))

print(f"Moved {len(train_img_files)} image files and corresponding segmentation files to train folders.")
print(f"Moved {len(test_img_files)} image files and corresponding segmentation files to test folders.")

Moved 679 image files and corresponding segmentation files to train folders.
Moved 170 image files and corresponding segmentation files to test folders.
