#Cell 1: Cloning and Installing kits23 Package

In [None]:
!git clone https://github.com/neheller/kits23
!cd kits23 && pip install -e .


Cloning into 'kits23'...
remote: Enumerating objects: 15609, done.[K
remote: Counting objects: 100% (83/83), done.[K
remote: Compressing objects: 100% (50/50), done.[K
remote: Total 15609 (delta 39), reused 61 (delta 30), pack-reused 15526 (from 1)[K
Receiving objects: 100% (15609/15609), 493.94 MiB | 24.38 MiB/s, done.
Resolving deltas: 100% (14008/14008), done.
Updating files: 100% (5720/5720), done.
Obtaining file:///content/kits23
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting surface-distance@ git+https://github.com/deepmind/surface-distance.git (from kits23==0.1.4)
  Cloning https://github.com/deepmind/surface-distance.git to /tmp/pip-install-hip_iu7g/surface-distance_bc918e2e32dc4e23a0feb7d9738fde45
  Running command git clone --filter=blob:none --quiet https://github.com/deepmind/surface-distance.git /tmp/pip-install-hip_iu7g/surface-distance_bc918e2e32dc4e23a0feb7d9738fde45
  Resolved https://github.com/deepmind/surface-distance.git to commit 1f805cea44680

#Cell 2: Downloading the Dataset

In [None]:
!kits23_download_data



Found 489 cases to download

Dowloading case_00588...: 100% 489/489 [10:35<00:00,  1.30s/it]


#Cell 3: Inspecting the Download



In [None]:
import os

# List top-level items
print("Root:", os.listdir())

# List contents of kits23 directory
if "kits23" in os.listdir():
    print("kits23/:", os.listdir("kits23"))

# List contents of dataset
if os.path.exists("kits23/dataset"):
    print("kits23/dataset/:", os.listdir("kits23/dataset"))
else:
    print("⚠️ 'kits23/dataset/' not found.")


Root: ['.config', 'kits23', 'sample_data']
kits23/: ['changelog.md', 'setup.py', 'kits23.egg-info', 'tests', '.gitignore', 'dataset', 'README.md', '.git', '.pylintrc', 'kits23', 'LICENSE', 'pull_request_template.md']
kits23/dataset/: ['case_00231', 'case_00414', 'case_00188', 'case_00220', 'case_00450', 'case_00536', 'case_00183', 'case_00459', 'case_00510', 'case_00468', 'case_00427', 'case_00024', 'case_00209', 'case_00079', 'case_00108', 'case_00235', 'case_00281', 'case_00537', 'case_00509', 'case_00565', 'case_00402', 'case_00009', 'case_00241', 'case_00517', 'case_00452', 'case_00438', 'case_00174', 'case_00269', 'case_00424', 'case_00018', 'case_00481', 'case_00027', 'case_00256', 'case_00226', 'case_00588', 'case_00017', 'case_00405', 'case_00037', 'case_00208', 'case_00245', 'case_00493', 'case_00515', 'case_00524', 'case_00011', 'case_00227', 'case_00292', 'case_00071', 'case_00409', 'case_00411', 'case_00469', 'case_00574', 'case_00423', 'case_00073', 'case_00440', 'case_001

## Cell 4: Saving Image Slices from the 3D Data

only slices with a normal kidney (1) without tumor or cyst will be considered normal. slices with tumor (2) or cyste (3) will be considered abnormal. slices with only background (0) will be eliminated and wont enter the dataset.

In [None]:
def save_slices(case_folder, output_dir):
    import os
    import io
    import numpy as np
    import nibabel as nib
    from PIL import Image

    # Paths to image and segmentation files
    imaging_path = os.path.join(case_folder, "imaging.nii.gz")
    seg_path = os.path.join(case_folder, "segmentation.nii.gz")

    # Load the 3D scan and segmentation volumes
    img = nib.load(imaging_path).get_fdata()
    seg = nib.load(seg_path).get_fdata()

    # Ensure output folders exist
    os.makedirs(os.path.join(output_dir, "normal"), exist_ok=True)
    os.makedirs(os.path.join(output_dir, "abnormal"), exist_ok=True)

    for i in range(img.shape[2]):  # Loop over axial slices
        slice_img = img[:, :, i]
        slice_seg = seg[:, :, i]

        # Updated classification logic
        if np.any(slice_seg == 1) and not np.any((slice_seg == 2) | (slice_seg == 3)):
            label = "normal"
        elif np.any((slice_seg == 2) | (slice_seg == 3)):
            label = "abnormal"
        else:
            continue  # Skip slices with only background

        # Normalize the image slice to 0–255 for saving as PNG
        slice_norm = 255 * (slice_img - np.min(slice_img)) / (np.ptp(slice_img) + 1e-5)
        slice_norm = slice_norm.astype(np.uint8)

        try:
            # Convert to image format (PIL)
            img_pil = Image.fromarray(slice_norm)

            # Test in memory before saving
            buf = io.BytesIO()
            img_pil.save(buf, format="PNG")
            buf.seek(0)
            test_img = Image.open(buf)
            test_img.verify()  # Validate image

            # Create filename and save to disk
            filename = f"{os.path.basename(case_folder)}_slice_{i:03d}.png"
            filepath = os.path.join(output_dir, label, filename)
            img_pil.save(filepath)

        except Exception as e:
            print(f"❌ Error saving slice {i} from {case_folder}: {e}")



#Cell 5: Saving Slices from All Cases

Make sure DATASET_DIR and OUTPUT_DIR are clearly defined

In [None]:
DATASET_DIR = "kits23/dataset"
OUTPUT_DIR = "slices_output1"


This code Loops through all patient folders that start with "case_". Calls the save_slices() function we defined earlier.

In [None]:
from tqdm import tqdm
import io  # Only needed if not imported earlier

cases = sorted([
    os.path.join(DATASET_DIR, c)
    for c in os.listdir(DATASET_DIR)
    if c.startswith("case_")
])

print("Saving slices into normal / abnormal folders...")
for case_folder in tqdm(cases):
    try:
        save_slices(case_folder, OUTPUT_DIR)
    except FileNotFoundError as e:
        print(f"⚠️ Skipping {case_folder}: File not found - {e}")
    except Exception as e:
        print(f"❌ Error saving slices from {case_folder}: {e}")

print("✅ Done saving slices.")


Saving slices into normal / abnormal folders...


100%|██████████| 489/489 [1:15:11<00:00,  9.23s/it]

✅ Done saving slices.





#Cell 6–7: Zip and Download Slices

 Compresses the entire output folder into a zip file

In [None]:
import shutil

shutil.make_archive("slices_output1", "zip", OUTPUT_DIR)


'/content/slices_output.zip'

Enables downloading the zip to your local machine (Google Colab only)

In [None]:
from google.colab import files
files.download('slices_output1.zip')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

#Cell 8: Count Number of Images

Simple summary of how many "normal" and "abnormal" slices were saved.

In [None]:
normal_count = len(os.listdir(os.path.join(OUTPUT_DIR, "normal")))
abnormal_count = len(os.listdir(os.path.join(OUTPUT_DIR, "abnormal")))

print(f"✅ Total slices saved:")
print(f"Normal: {normal_count}")
print(f"Abnormal: {abnormal_count}")


✅ Total slices saved:
Normal: 54819
Abnormal: 41703
