In [17]:
import pandas as pd
import json
import os 

basedir = "/root/datasets/Totalsegmentator/"  # Replace with your actual base directory

exclude_ids = {"s0830", "s0045"}  # Replace with actual IDs
# Load CSV file
csv_file = os.path.join(basedir, "meta.csv")  # Replace with actual file name
df = pd.read_csv(csv_file, delimiter=';')

df = df[~df['image_id'].isin(exclude_ids)]
# Add full file paths to the dataframe
df['image'] = df['image_id'].apply(lambda x: os.path.join(x, 'ct.nii.gz'))
df['label'] = df['image_id'].map(lambda x: os.path.join(x, 'label/label_map.nii.gz'))
df = df[["image", "split", "label"]]
# Group data by 'split' column
split_dict = {split: df[df['split'] == split].to_dict(orient='records') for split in df['split'].unique()}

# Save to JSON
json_file = os.path.join(basedir, "split_data.json")
with open(json_file, "w") as f:
    json.dump(split_dict, f, indent=4)

print(f"JSON file saved as {json_file}")

JSON file saved as /root/datasets/Totalsegmentator/split_data.json


In [4]:
import os
import nibabel as nib
import numpy as np
import multiprocessing

def get_segmentation_filenames(seg_dirs):
    """
    Gets the sorted list of unique segmentation filenames across all subjects.
    Ensures that all subjects have the same segmentation order.
    """
    all_files = set()
    for seg_dir in seg_dirs:
        if os.path.exists(seg_dir):
            all_files.update([f for f in os.listdir(seg_dir) if f.endswith(".nii.gz")])
    return sorted(all_files)  # Ensures the same order for all subjects

def create_label_map(seg_dir, output_path, expected_files):
    """
    Creates a single label map from multiple segmentation masks.
    
    - Each segmentation file corresponds to a unique integer label (1, 2, ..., num_classes).
    - The final output is a 3D NIfTI file with labeled regions.

    Parameters:
    - seg_dir (str): Path to the directory containing segmentation .nii.gz files.
    - output_path (str): Path to save the final label map.
    - expected_files (list): List of expected segmentation filenames in order.

    Returns:
    - output_path (str): Path to the saved label map NIfTI file.
    """
    reference_nii = None
    label_map = None  # Initialize label map

    for idx, seg_file in enumerate(expected_files, start=1):  # Start labels from 1
        file_path = os.path.join(seg_dir, seg_file)

        if os.path.exists(file_path):
            nii = nib.load(file_path)
            seg_data = nii.get_fdata()

            if reference_nii is None:
                reference_nii = nii  # Store first file's metadata
                label_map = np.zeros(seg_data.shape, dtype=np.uint8)  # Initialize label map

            label_map[seg_data > 0] = idx  # Assign class index where segmentation exists
        else:
            print(f"Warning: {seg_file} not found in {seg_dir}, skipping.")

    if reference_nii is None:
        print(f"Error: No segmentations found in {seg_dir}. Skipping.")
        return None

    # Ensure output directory exists
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    # Save label map as NIfTI
    nib.save(nib.Nifti1Image(label_map, reference_nii.affine, reference_nii.header, dtype=np.uint8), output_path)

    print(f"Saved label map: {output_path}")
    return output_path

def process_subject(subject, base_dir, expected_files):
    """
    Processes a single subject: converts segmentations to a label map and saves to 'label' folder.
    """
    subject_path = os.path.join(base_dir, subject)
    seg_dir = os.path.join(subject_path, "segmentations")  # Path to segmentation folder
    label_dir = os.path.join(subject_path, "label")  # Output label folder
    output_file = os.path.join(label_dir, "label_map.nii.gz")  # Output file

    if os.path.exists(seg_dir):  # Check if segmentations folder exists
        create_label_map(seg_dir, output_file, expected_files)
    else:
        print(f"Segmentations folder not found for {subject}, skipping.")

def process_all_subjects(base_dir, num_workers=None):
    """
    Iterates over all subject folders in base_dir using multiprocessing to speed up processing.

    Parameters:
    - base_dir (str): Path to the main directory containing all subject folders.
    - num_workers (int, optional): Number of CPU cores to use. Default is all available cores.
    """
    # Get all subject directories
    subjects = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]
    seg_dirs = [os.path.join(base_dir, subject, "segmentations") for subject in subjects if os.path.exists(os.path.join(base_dir, subject, "segmentations"))]

    # Get the consistent order of segmentation files across all subjects
    expected_files = get_segmentation_filenames(seg_dirs)
    print(expected_files, len(expected_files))

    # # Use multiprocessing to process subjects in parallel
    with multiprocessing.Pool(processes=num_workers) as pool:
        pool.starmap(process_subject, [(subject, base_dir, expected_files) for subject in subjects])


In [5]:

basedir = "/root/datasets/Totalsegmentator/"  # Replace with your actual base directory
process_all_subjects(basedir, num_workers=12)  # Use 8 CPU cores (or omit for max available)


['adrenal_gland_left.nii.gz', 'adrenal_gland_right.nii.gz', 'aorta.nii.gz', 'atrial_appendage_left.nii.gz', 'autochthon_left.nii.gz', 'autochthon_right.nii.gz', 'brachiocephalic_trunk.nii.gz', 'brachiocephalic_vein_left.nii.gz', 'brachiocephalic_vein_right.nii.gz', 'brain.nii.gz', 'clavicula_left.nii.gz', 'clavicula_right.nii.gz', 'colon.nii.gz', 'common_carotid_artery_left.nii.gz', 'common_carotid_artery_right.nii.gz', 'costal_cartilages.nii.gz', 'duodenum.nii.gz', 'esophagus.nii.gz', 'femur_left.nii.gz', 'femur_right.nii.gz', 'gallbladder.nii.gz', 'gluteus_maximus_left.nii.gz', 'gluteus_maximus_right.nii.gz', 'gluteus_medius_left.nii.gz', 'gluteus_medius_right.nii.gz', 'gluteus_minimus_left.nii.gz', 'gluteus_minimus_right.nii.gz', 'heart.nii.gz', 'hip_left.nii.gz', 'hip_right.nii.gz', 'humerus_left.nii.gz', 'humerus_right.nii.gz', 'iliac_artery_left.nii.gz', 'iliac_artery_right.nii.gz', 'iliac_vena_left.nii.gz', 'iliac_vena_right.nii.gz', 'iliopsoas_left.nii.gz', 'iliopsoas_right.nii

In [3]:
import nibabel as nib
import numpy as np

# Define file path
nii_file = "/root/datasets/Totalsegmentator/s0104/label/label_map.nii.gz"

# Load the NIfTI file
nii = nib.load(nii_file)


# Get the image data as a NumPy array
data = nii.get_fdata()

# Check datatype, min, and max values
print(f"Data Type: {data.dtype}")
print(f"Data:", nii.header.get_data_dtype())
print(f"Min Value: {np.min(data)}")
print(f"Max Value: {np.max(data)}")

Data Type: float64
Data: uint8
Min Value: 0.0
Max Value: 105.0


In [6]:
import os
import nibabel as nib
import gzip

def check_nifti_files(root_dir):
    """
    Recursively iterates over all `.nii.gz` files in `root_dir`
    and checks if they are corrupted.
    """
    corrupted_files = []

    for dirpath, _, filenames in os.walk(root_dir):
        for file in filenames:
            if file.endswith(".nii.gz"):
                file_path = os.path.join(dirpath, file)
                try:
                    # Step 1: Try to open as gzip file to check for corruption
                    with gzip.open(file_path, 'rb') as f:
                        f.read(100)  # Read first 100 bytes to check integrity

                    # Step 2: Try loading with NiBabel
                    nii = nib.load(file_path)
                    nii.get_fdata()  # Ensure data is readable
                    
                except (gzip.BadGzipFile, OSError) as e:
                    print(f"❌ Corrupted GZip File: {file_path} - {e}")
                    corrupted_files.append(file_path)

                except Exception as e:
                    print(f"❌ Other NIfTI Error: {file_path} - {e}")
                    corrupted_files.append(file_path)

    if corrupted_files:
        print("\n⚠️ Found Corrupted Files:")
        for corrupt in corrupted_files:
            print(f"  - {corrupt}")
    else:
        print("\n✅ All NIfTI files are valid!")

# Run check on dataset directory
root_directory = "/root/datasets/Totalsegmentator"  # Change this to your dataset path
check_nifti_files(root_directory)



❌ Other NIfTI Error: /root/datasets/Totalsegmentator/s0000/label/concatenated_labels.nii.gz - Compressed file ended before the end-of-stream marker was reached
❌ Corrupted GZip File: /root/datasets/Totalsegmentator/s0011/label/concatenated_labels.nii.gz - Expected 4877349867 bytes, got 458554261 bytes from 
 - could the file be damaged?
❌ Other NIfTI Error: /root/datasets/Totalsegmentator/s0037/label/concatenated_labels.nii.gz - Compressed file ended before the end-of-stream marker was reached
❌ Corrupted GZip File: /root/datasets/Totalsegmentator/s0045/ct.nii.gz - CRC check failed 0xe7b9154e != 0xaf073076
❌ Other NIfTI Error: /root/datasets/Totalsegmentator/s0052/label/concatenated_labels.nii.gz - Compressed file ended before the end-of-stream marker was reached


KeyboardInterrupt: 

In [16]:
import os
import nibabel as nib
import gzip

# Suspected corrupted files
suspect_files = [
    "/root/datasets/Totalsegmentator/s0234/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0234/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0771/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0771/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0398/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0398/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0249/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0249/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0095/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0095/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0021/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0021/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0625/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0625/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0216/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0216/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0045/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0045/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0000/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0000/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0698/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0698/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0123/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0123/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0324/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0324/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0747/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0747/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0032/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0032/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0621/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0621/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0614/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0614/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0420/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0420/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0572/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0572/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0658/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0658/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0546/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0546/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0466/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0466/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0520/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0520/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0607/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0607/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0650/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0650/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0675/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0675/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s1379/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s1379/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0873/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0873/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0655/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0655/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0352/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0352/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0544/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0544/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0346/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0346/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0618/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0618/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0506/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0506/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0158/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0158/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0853/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0853/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0445/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0445/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0458/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0458/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0339/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0339/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0462/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0462/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0208/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0208/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s0170/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s0170/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s1287/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s1287/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s1233/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s1233/label/label_map.nii.gz",
    "/root/datasets/Totalsegmentator/s1278/ct.nii.gz",
    "/root/datasets/Totalsegmentator/s1278/label/label_map.nii.gz"
]

def check_nifti_file(file_path):
    """Checks if a NIfTI file is corrupted."""
    if not os.path.exists(file_path):
        print(f"❌ File not found: {file_path}")
        return False

    try:
        # Step 1: Check if it's a valid gzip file
        with gzip.open(file_path, 'rb') as f:
            f.read(100)  # Try reading first 100 bytes

        # Step 2: Try loading with NiBabel
        nii = nib.load(file_path)
        nii.get_fdata()  # Ensure data is readable

        print(f"✅ File is valid: {file_path}")
        return True

    except (gzip.BadGzipFile, OSError) as e:
        print(f"❌ Corrupted GZip File: {file_path} - {e}")
    except Exception as e:
        print(f"❌ Other NIfTI Error: {file_path} - {e}")

    return False

# Run check for each suspected file
for file in suspect_files:
    check_nifti_file(file)


✅ File is valid: /root/datasets/Totalsegmentator/s0234/ct.nii.gz
✅ File is valid: /root/datasets/Totalsegmentator/s0234/label/label_map.nii.gz
✅ File is valid: /root/datasets/Totalsegmentator/s0771/ct.nii.gz
✅ File is valid: /root/datasets/Totalsegmentator/s0771/label/label_map.nii.gz
✅ File is valid: /root/datasets/Totalsegmentator/s0398/ct.nii.gz
✅ File is valid: /root/datasets/Totalsegmentator/s0398/label/label_map.nii.gz
✅ File is valid: /root/datasets/Totalsegmentator/s0249/ct.nii.gz
✅ File is valid: /root/datasets/Totalsegmentator/s0249/label/label_map.nii.gz
✅ File is valid: /root/datasets/Totalsegmentator/s0095/ct.nii.gz
✅ File is valid: /root/datasets/Totalsegmentator/s0095/label/label_map.nii.gz
✅ File is valid: /root/datasets/Totalsegmentator/s0021/ct.nii.gz
✅ File is valid: /root/datasets/Totalsegmentator/s0021/label/label_map.nii.gz
✅ File is valid: /root/datasets/Totalsegmentator/s0625/ct.nii.gz
✅ File is valid: /root/datasets/Totalsegmentator/s0625/label/label_map.nii.gz

In [20]:
import torch

checkpoint_path = "./logs/2025-02-27T18-28-28_swlin_unetr_totseg/checkpoints/last.ckpt"

# Load only metadata
checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=False)['state_dict']

# Print top-level keys
print("Checkpoint Keys:", list(checkpoint.keys()))


  from .autonotebook import tqdm as notebook_tqdm


Checkpoint Keys: ['epoch', 'global_step', 'pytorch-lightning_version', 'state_dict', 'loops', 'callbacks', 'optimizer_states', 'lr_schedulers']
