# This code organises the images in the CXR8 dataset into their separate disease classes, creating new folders for each of the disease classes.

In [None]:
import pandas as pd
import os
import shutil

# Load the CSV file into a DataFrame
df = pd.read_csv('/media/ntu/volume1/home/s123md305_01/Documents/CXR8/Data_Entry_2017_v2020.csv')

# The folder where your images are currently stored
source_folder = '/media/ntu/volume1/home/s123md305_01/Documents/Dataset'

# The root folder where the separated class folders will be created
root_folder = '/media/ntu/volume1/home/s123md305_01/Documents/Separated'

# Check if the folder exists, if not, create it
def check_create_folder(path):
    if not os.path.exists(path):
        os.makedirs(path)

# Ensure the root folder for separated images exists
check_create_folder(root_folder)

# Loop through the DataFrame and process each image
for index, row in df.iterrows():
    image_name = row['Image Index']
    labels = row['Finding Labels']
    
    # Check if the image has a single label
    if '|' not in labels:
        # The destination folder is the label
        dest_folder = os.path.join(root_folder, labels)
        
        # Check if the destination folder exists, if not, create it
        check_create_folder(dest_folder)
        
        # Define source and destination paths
        source_path = os.path.join(source_folder, image_name)
        dest_path = os.path.join(dest_folder, image_name)
        
        # Check if the file exists to avoid errors
        if os.path.isfile(source_path):
            # Copy the image to the destination folder
            shutil.copy(source_path, dest_path)
            print(f'Copied {image_name} to {labels}/')
        else:
            print(f'File {source_path} does not exist. Skipping.')
    else:
        print(f'Image {image_name} has multiple labels. Skipping.')

print('Done separating images.')
