In [None]:
import os
import csv
import json
import shutil
import pandas as pd
from tqdm import tqdm
import numpy as np
from PIL import Image
from sklearn.model_selection import KFold
from PIL import Image, UnidentifiedImageError
from sklearn.model_selection import GroupKFold
from sklearn.model_selection import GroupShuffleSplit

IF IMAGES ARE .WEBP RUN BELOW

In [None]:
def convert_webp_to_png(directory):
    for subdir, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.webp'):
                webp_path = os.path.join(subdir, file)
                png_path = os.path.join(subdir, os.path.splitext(file)[0] + '.png')
                
                # Open the WEBP and convert it to PNG
                image = Image.open(webp_path)
                image.save(png_path, 'PNG')
                print(f"Converted {webp_path} to {png_path}")

                # Optionally, remove the original WEBP file
                os.remove(webp_path)
                print(f"Removed original {webp_path}")

# Define your image directory
base_dir = "your/image/dataset/directory"

# Convert all WEBP images in the base directory
convert_webp_to_png(base_dir)

IF IMAGES ARE .BMP RUN BELOW

In [None]:
def convert_bmp_to_png(directory):
    for subdir, dirs, files in os.walk(directory):
        for file in files:
            if file.lower().endswith('.bmp'):
                bmp_path = os.path.join(subdir, file)
                png_path = os.path.join(subdir, os.path.splitext(file)[0] + '.png')
                
                # Open and convert the image
                image = Image.open(bmp_path)
                image.save(png_path, 'PNG')
                print(f"Converted {bmp_path} to {png_path}")

                # Optionally remove the original
                os.remove(bmp_path)
                print(f"Removed original {bmp_path}")

# Define your image directory
base_dir = "your/image/dataset/directory"

# Run conversion
convert_bmp_to_png(base_dir)

CREATE MAPPING CSV

In [None]:
# Set your image directory here
image_dir = "your/image/dataset/directory"
csv_file_path = os.path.join(image_dir, "mapping.csv")

# Automatically find all .png files in the directory
image_filenames = [f for f in os.listdir(image_dir) if f.endswith(".png")]

# Write the CSV
with open(csv_file_path, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['dataset_type', 'image_path', 'label_path'])

    for img in image_filenames:
        img_path = os.path.join(image_dir, img)
        if os.path.exists(img_path):
            writer.writerow(["scleral_lens_photo", img_path, ""])
        else:
            print(f"Image not found: {img_path}")

print(f"CSV file created at {csv_file_path}")

CUNEX PREP

In [None]:
output_base_path = "your_working_directory/nnunet/dataset/nnunet_raw/nnunet_raw_data/Dataset001_name"
os.makedirs(os.path.join(output_base_path, 'imagesTs'), exist_ok=True)

In [None]:
# Define label mapping
label_mapping = {'label_path': 1}

def visualize_and_save(image_path, label_path, output_path):
    # Load the image and label
    image = Image.open(image_path)
    label = Image.open(label_path)

    # Convert label to RGB for visualization
    label_array = np.array(label)
    label_rgb = np.zeros((*label_array.shape, 3), dtype=np.uint8)  # Initialize RGB array

    # Assign colors to each class (excluding background)
    colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0)]  # Red, Green, Blue, Yellow for 4 classes
    for i, color in enumerate(colors, start=1):
        label_rgb[label_array == i] = color

    label_rgb_image = Image.fromarray(label_rgb)

    # Overlay label on image
    combined = Image.blend(image.convert('RGBA'), label_rgb_image.convert('RGBA'), alpha=0.5)

    # Save the visualization
    combined.save(output_path)

# Function to process and copy images
def process_and_copy(row, index, is_test=False):
    try:
        input_image_path = row['image_path']
        pid = row['patient']
        new_name = f'kera_{index:05d}'

        # Open the input image and convert to grayscale if necessary
        input_image = Image.open(input_image_path)
        if input_image.mode != 'L':  # 'L' mode is for grayscale images
            input_image = input_image.convert('L')

        output_image_path = os.path.join(output_base_path, 'imagesTs' if is_test else 'imagesTr', new_name + '_0000.png')
        input_image.save(output_image_path)

        # Initialize label_image with the size of the input image
        label_image = np.zeros(input_image.size, dtype=np.uint8)

        for mask, label_index in label_mapping.items():
            if row[mask] != 'blank':
                annotation = Image.open(row[mask]).convert('L')  # Convert to grayscale
                annotation_array = np.array(annotation)
                
                # Resize label_image if dimensions don't match
                if annotation_array.shape != label_image.shape:
                    label_image = np.zeros(annotation_array.shape, dtype=np.uint8)

                label_image[annotation_array > 0] = label_index
                
        label_image_path = os.path.join(output_base_path, 'labelsTs' if is_test else 'labelsTr', new_name + '.png')
        Image.fromarray(label_image).save(label_image_path)

        # Visualization (for a subset of images)
        if index < 10:
            vis_output_path = os.path.join(output_base_path, 'visTs' if is_test else 'visTr', new_name + '.png')
            visualize_and_save(output_image_path, label_image_path, vis_output_path)

        return input_image_path, new_name

    except (IOError, FileNotFoundError, UnidentifiedImageError) as e:
        print(f"Error processing file: {input_image_path}. Error: {e}. Skipping.")
        return None, None

In [None]:
input_dir = "your/image/dataset/directory"

In [None]:
def visualize_and_save(image_path, output_path):
    image = Image.open(image_path).convert('L')  # Ensure grayscale
    image.save(output_path)
    
def process_and_copy():
    index = 0

    for input_dir in input_dirs:
        for root, _, files in os.walk(input_dir):
            for file in sorted(files):
                if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tif', '.tiff')):
                    image_path = os.path.join(root, file)
                    new_name = f'kera_{index:05d}_0000.png'
                    output_image_path = os.path.join(output_image_dir, new_name)

                    try:
                        img = Image.open(image_path)
                        if img.mode != 'L':
                            img = img.convert('L')
                        img.save(output_image_path)

                    except (IOError, FileNotFoundError, UnidentifiedImageError) as e:
                        print(f"Error processing {image_path}: {e}")
                        continue

In [None]:
# Process images
name_mapping = []
index = 0
for _, row in tqdm(combined_df.iterrows(), total=combined_df.shape[0]):
    original_path, new_name = process_and_copy(row, index)
    if original_path is not None:
        name_mapping.append([original_path, new_name])
        index += 1
        if test_out and index >= 100:
            break

# Process test images
for _, row in tqdm(test_df.iterrows(), total=test_df.shape[0]):
    original_path, new_name = process_and_copy(row, index, is_test=True)
    name_mapping.append([original_path, new_name])
    index += 1
    if test_out and index >= NUM_TEST_OUT:
        break

# Save name mapping
pd.DataFrame(name_mapping, columns=['OriginalPath', 'NNUNetName']).to_csv(name_mapping_csv, index=False)

In [None]:
name_mapping = []
index = 0

# Process all images
for input_dir in input_dirs:
    for root, _, files in os.walk(input_dir):
        for file in tqdm(sorted(files), desc=f"Processing {input_dir}"):
            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tif', '.tiff')):
                image_path = os.path.join(root, file)
                new_name = f'kera_{index:05d}_0000.png'
                output_path = os.path.join(output_image_dir, new_name)

                try:
                    img = Image.open(image_path)
                    if img.mode != 'L':
                        img = img.convert('L')
                    img.save(output_path)

                    name_mapping.append([image_path, new_name])
                    index += 1

                except (IOError, FileNotFoundError, UnidentifiedImageError) as e:
                    print(f"Error processing {image_path}: {e}")

# Save mapping CSV
mapping_csv_path = os.path.join(os.path.dirname(output_image_dir), "image_name_mapping.csv")
pd.DataFrame(name_mapping, columns=['OriginalPath', 'NNUNetName']).to_csv(mapping_csv_path, index=False)

print(f"Done. Saved {index} images.")
print(f"Name mapping saved to: {mapping_csv_path}")

In [None]:
# Create dataset.json
dataset_json = {
    "channel_names": {"0": "0000"},
    "labels": {},  # No training, so no labels
    "numTraining": 0,
    "file_ending": ".png",
}
with open(os.path.join(output_base_path, 'dataset.json'), 'w') as f:
    json.dump(dataset_json, f, indent=4)