In [1]:
import os
import shutil

# Function to rename and move the files
def rename_and_move_files(root_folder, destination_folder):
    for primary_folder in os.listdir(root_folder):
        primary_path = os.path.join(root_folder, primary_folder)
        if os.path.isdir(primary_path):
            for secondary_folder in os.listdir(primary_path):
                secondary_path = os.path.join(primary_path, secondary_folder)
                if os.path.isdir(secondary_path):
                    for filename in os.listdir(secondary_path):
                        if filename.startswith("combined_image"):
                            old_filepath = os.path.join(secondary_path, filename)
                            new_filename = f"{primary_folder}-{secondary_folder}-{filename}"
                            new_filepath = os.path.join(destination_folder, new_filename)
                            shutil.move(old_filepath, new_filepath)

# Main function
def main():
    root_folder = "./rsna_preprocessing_output"  # Change this to your main directory
    destination_folder = "./restructure"  # Change this to your destination directory
    os.makedirs(destination_folder, exist_ok=True)  # Create the destination folder if it doesn't exist
    rename_and_move_files(root_folder, destination_folder)

if __name__ == "__main__":
    main()


In [4]:
import os
from collections import defaultdict
from PIL import Image

def combine_images_in_folder(folder_path, output_folder):
    # Dictionary to store image paths by their prefix
    image_paths = defaultdict(list)
    
    # Iterate over files in the folder
    for filename in os.listdir(folder_path):
        if filename.endswith(".jpg"):
            prefix = filename.rsplit("-", 1)[0]  # Extract prefix from filename
            image_paths[prefix].append(os.path.join(folder_path, filename))
    
    # Combine images
    for prefix, paths in image_paths.items():
        if len(paths) == 3:  # Ensure there are x, y, z images for this prefix
            output_path = os.path.join(output_folder, f"{prefix}-combined_rgb_image.jpg")
            combine_images(paths[0], paths[1], paths[2], output_path)

def combine_images(image_x_path, image_y_path, image_z_path, output_path):
    # Open the grayscale images
    image_x = Image.open(image_x_path)
    image_y = Image.open(image_y_path)
    image_z = Image.open(image_z_path)
    
    # Ensure images have the same size
    width, height = image_x.size
    assert (width, height) == image_y.size == image_z.size, "Images must have the same size"
    
    # Create a new RGB image
    combined_image = Image.new("RGB", (width, height))
    
    # Iterate over each pixel and set RGB values
    for y in range(height):
        for x in range(width):
            # Get pixel values from each image
            pixel_x = image_x.getpixel((x, y))[0]
            pixel_y = image_y.getpixel((x, y))[0]
            pixel_z = image_z.getpixel((x, y))[0]
            
            # Create RGB tuple
            rgb_pixel = (pixel_x, pixel_y, pixel_z)
            
            # Set RGB value in combined image
            combined_image.putpixel((x, y), rgb_pixel)
    
    # Save the combined image
    combined_image.save(output_path)
    print(f"Combined image saved at {output_path}")

# Example usage
combine_images_in_folder("./restructure", "./combined_rgb")


Combined image saved at ./combined_rgb/32943-22788-combined_rgb_image.jpg
Combined image saved at ./combined_rgb/42134-37709-combined_rgb_image.jpg
Combined image saved at ./combined_rgb/44001-5460-combined_rgb_image.jpg
Combined image saved at ./combined_rgb/48681-13095-combined_rgb_image.jpg
Combined image saved at ./combined_rgb/732-26337-combined_rgb_image.jpg
Combined image saved at ./combined_rgb/37169-26343-combined_rgb_image.jpg
Combined image saved at ./combined_rgb/64469-34841-combined_rgb_image.jpg
Combined image saved at ./combined_rgb/12685-9181-combined_rgb_image.jpg
Combined image saved at ./combined_rgb/9813-24149-combined_rgb_image.jpg
Combined image saved at ./combined_rgb/56038-6467-combined_rgb_image.jpg
Combined image saved at ./combined_rgb/30515-12104-combined_rgb_image.jpg
Combined image saved at ./combined_rgb/59938-41040-combined_rgb_image.jpg
Combined image saved at ./combined_rgb/42602-34191-combined_rgb_image.jpg
Combined image saved at ./combined_rgb/63472

In [None]:
import os
import shutil
import pandas as pd
import random

# Read train.csv file
csv_file_path = 'train.csv'
df = pd.read_csv(csv_file_path)

# Check if required columns exist
required_columns = ['patient_id', 'any_injury']
if not all(column in df.columns for column in required_columns):
    raise ValueError("Required columns not found in the CSV file.")

# Extract patient IDs from filenames and create mapping
patient_id_mapping = {}
image_folder_path = './combined_rgb'  # Assuming images are in the current directory
for filename in os.listdir(image_folder_path):
    if filename.endswith('.jpg'):
        parts = filename.split('-')
        patient_id = parts[0]
        patient_id_mapping[filename] = int(df[df['patient_id'] == int(patient_id)]['any_injury'])

# Define paths for the dataset
dataset_path = 'dataset'
train_path = os.path.join(dataset_path, 'Train')
val_path = os.path.join(dataset_path, 'Validation')
test_path = os.path.join(dataset_path, 'Test')

# Create directories for the dataset
for path in [train_path, val_path, test_path]:
    os.makedirs(os.path.join(path, 'Any_injury_0'), exist_ok=True)
    os.makedirs(os.path.join(path, 'Any_injury_1'), exist_ok=True)

# Class imbalance handling: Count occurrences of each class
class_counts = {0: 0, 1: 0}
for label in patient_id_mapping.values():
    class_counts[label] += 1

# Determine undersampling ratio
undersampling_ratio = min(class_counts.values()) / max(class_counts.values())

# Shuffle and split filenames into train, validation, and test sets
filenames = list(patient_id_mapping.keys())
random.shuffle(filenames)
train_size = int(0.6 * len(filenames))
val_size = int(0.2 * len(filenames))
test_size = int(0.2 * len(filenames))

train_filenames = filenames[:train_size]
val_filenames = filenames[train_size:train_size + val_size]
test_filenames = filenames[train_size + val_size:]

# Function to move images to appropriate folders and perform undersampling
def move_images(filenames, destination_path):
    for filename in filenames:
        label = patient_id_mapping[filename]
        source_path = os.path.join(image_folder_path, filename)
        if random.random() > undersampling_ratio and class_counts[label] > min(class_counts.values()):
            # Skip the image for undersampling
            continue
        shutil.copy(source_path, os.path.join(destination_path, f'Any_injury_{label}', filename))
        class_counts[label] -= 1

# Move images to train, validation, and test folders
move_images(train_filenames, train_path)
move_images(val_filenames, val_path)
move_images(test_filenames, test_path)

print("Dataset creation completed.")
