In [None]:
import os
import matplotlib.pyplot as plt
import numpy as np
import cv2
import multiprocessing
import csv

from PIL import Image
from collections import Counter
from functools import partial
from tqdm import tqdm
from pathlib import Path

In [None]:
data_folder = './data'
converted_folder = './converted_images'

INPUT_DIR = Path(data_folder)
OUTPUT_DIR = Path(converted_folder)

os.makedirs(converted_folder, exist_ok=True)

In [None]:
def convert_image(input_path, output_base=OUTPUT_DIR):
    """
    Convert a 16-bit image to 8-bit using direct scaling

    Args:
        input_path: Path to the input image file
        output_base: Base directory for output
    """
    # Convert string paths to Path objects if needed
    if isinstance(input_path, str):
        input_path = Path(input_path)
    if isinstance(output_base, str):
        output_base = Path(output_base)

    # Create relative path to maintain directory structure
    rel_path = input_path.relative_to(INPUT_DIR)
    output_path = output_base / rel_path
    output_path.parent.mkdir(parents=True, exist_ok=True)

    # Load image preserving depth
    img = cv2.imread(str(input_path), cv2.IMREAD_UNCHANGED)

    if img is None:
        print(f"Failed to load image: {input_path}")
        return False

    # Check if it's 16-bit
    if img.dtype != np.uint16:
        print(f"Skipping non-16-bit image: {input_path}")
        return False

    # Scale down to 8-bit
    img_8bit = (img / 256).astype(np.uint8)  # from 0–65535 to 0–255

    # Save the converted image
    cv2.imwrite(str(output_path), img_8bit)
    print(f"Converted: {input_path} -> {output_path}")
    return True

In [None]:
def create_file_list():
    image_files = []
    for root, dirs, files in os.walk(data_folder):
        # Create corresponding subdirectory in the output folder
        rel_path = os.path.relpath(root, data_folder)
        if rel_path == '.':
            target_dir = converted_folder
        else:
            target_dir = os.path.join(converted_folder, rel_path)
        os.makedirs(target_dir, exist_ok=True)

        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.tif', '.tiff', '.bmp')):
                source_path = os.path.join(root, file)
                image_files.append((source_path, target_dir))

    return image_files

In [None]:
# Get file list and create subdirectories
image_files = create_file_list()
print(f"Found {len(image_files)} image files")

In [None]:
# Scan all images and identify 16-bit ones
def identify_16bit_images(image_files):
    images_to_convert = []
    all_bit_depths = []

    print("Scanning images to identify 16-bit files...")
    for source_path, target_dir in tqdm(image_files, desc="Scanning images"):
        file_name = os.path.basename(source_path)

        try:
            with Image.open(source_path) as img:
                bit_depth = img.mode
                if bit_depth in ["I;16", "RGB;16"] or (hasattr(img, "bits") and img.bits == 16):
                    images_to_convert.append((source_path, target_dir))
                    all_bit_depths.append("16-bit")
                else:
                    all_bit_depths.append("8-bit")
        except Exception as e:
            print(f"Error scanning {file_name}: {e}")
            all_bit_depths.append("error")

    return images_to_convert, all_bit_depths

In [None]:
# Phase 1: Identify all 16-bit images
images_to_convert, before_bit_depths = identify_16bit_images(image_files)
print(f"Found {len(images_to_convert)} 16-bit images to convert")

In [None]:
# Modify the CSV functions to handle Path objects
def save_images_to_csv(images_to_convert, output_file='images_to_convert.csv'):
    with open(output_file, 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerow(['source_path'])  # Simplified header
        for source_path, _ in images_to_convert:
            # Store the path with forward slashes for consistency
            source_path = str(source_path).replace('\\', '/')
            csv_writer.writerow([source_path])
    return output_file


def load_images_from_csv(csv_file='images_to_convert.csv'):
    paths_to_convert = []
    with open(csv_file, 'r', newline='') as csvfile:
        csv_reader = csv.reader(csvfile)
        next(csv_reader)  # Skip header
        for row in csv_reader:
            if row:
                # Convert to Path object
                path = Path(row[0])
                paths_to_convert.append(path)
    return paths_to_convert

# Add this after the identify_16bit_images call
csv_file = save_images_to_csv(images_to_convert)
print(f"Saved list of {len(images_to_convert)} 16-bit images to {csv_file}")

In [None]:
# Modify your convert_images_from_csv function to track converted images
def convert_images_from_csv(csv_file='images_to_convert.csv', output_dir=OUTPUT_DIR):
    # Load image paths from CSV
    image_paths = load_images_from_csv(csv_file)
    print(f"Loaded {len(image_paths)} image paths from {csv_file}")

    # Convert images
    converted_count = 0
    converted_paths = []

    for i, path in enumerate(tqdm(image_paths, desc="Converting images")):
        success = convert_image(path, output_dir)
        if success:
            converted_count += 1
            converted_paths.append(str(path))

        # Show progress every 100 images for better performance
        if (i + 1) % 100 == 0 or (i + 1) == len(image_paths):
            print(f"Progress: {i + 1}/{len(image_paths)}")

    print(f"Conversion complete. Converted {converted_count} out of {len(image_paths)} images.")
    return converted_count, converted_paths

In [None]:
# Update the statistics cell
use_existing_csv = True
csv_file = 'images_to_convert.csv'

if use_existing_csv and os.path.exists(csv_file):
    print(f"Using existing CSV file: {csv_file}")
    # Convert images directly from the CSV
    converted_count, converted_paths = convert_images_from_csv(csv_file)

    # We only know about converted images in this case
    before_count = {'16-bit': len(converted_paths), '8-bit': 0}
    after_count = {'8-bit': converted_count}
else:
    # Scan for 16-bit images and save to CSV
    print("Scanning for 16-bit images...")
    image_files = create_file_list()
    images_to_convert, before_bit_depths = identify_16bit_images(image_files)
    before_count = Counter(before_bit_depths)

    # Save paths to CSV
    csv_file = save_images_to_csv(images_to_convert)
    print(f"Saved {len(images_to_convert)} 16-bit image paths to {csv_file}")

    # Ask user whether to convert now or later
    convert_now = input("Convert images now? (y/n): ").lower() == 'y'

    if convert_now:
        converted_count, converted_paths = convert_images_from_csv(csv_file)
        after_count = {'8-bit': converted_count}
    else:
        print(f"Images paths saved to {csv_file}. Run the notebook later to convert them.")
        # Set default values if not converting now
        converted_count = 0
        after_count = {'8-bit': 0}

In [None]:
def copy_8bit_images(image_files):
    copied_count = 0
    for source_path, target_dir in tqdm(image_files, desc="Copying 8-bit images"):
        try:
            with Image.open(source_path) as img:
                if img.mode not in ["I;16", "RGB;16"]:
                    target_path = os.path.join(target_dir, os.path.basename(source_path))
                    img.save(target_path)
                    copied_count += 1
        except Exception as e:
            print(f"Failed to copy {source_path}: {e}")
    # print(f"Copied {copied_count} 8-bit images to output folder.")


In [None]:
# After scanning and converting 16-bit images
copy_8bit_images(image_files)
