In [1]:
import os
import shutil
import pandas as pd

In [2]:
# Define the base paths for the image and mask directories (to be replaced with actual paths when provided)
image_dir = 'dataset/1_Train,Valid_Image'  # Placeholder path
mask_dir = 'dataset/2_Train,Valid_Mask'  # Placeholder path

# Define the destination directories
image_train_dir = 'dataset/Image_Train_Data'  # Placeholder path
image_valid_dir = 'dataset/Image_Valid_Data'  # Placeholder path
mask_train_dir = 'dataset/Mask_Train_Data'  # Placeholder path
mask_valid_dir = 'dataset/Mask_Valid_Data'  # Placeholder path

# Create the destination directories if they don't already exist
os.makedirs(image_train_dir, exist_ok=True)
os.makedirs(image_valid_dir, exist_ok=True)
os.makedirs(mask_train_dir, exist_ok=True)
os.makedirs(mask_valid_dir, exist_ok=True)

In [3]:
# Read the TrainValid_split.csv file into a DataFrame
train_valid_split_df = pd.read_csv('dataset/TrainValid_split.csv')

# Create a set of scan_ids for the 'Train' group
train_ids = set(train_valid_split_df[train_valid_split_df['group'] == 'Train']['id'])

valid_ids = set(train_valid_split_df[train_valid_split_df['group'] == 'Valid']['id'])

scan_ids = train_ids.union(valid_ids)

In [4]:
# Function to move files based on train or valid group
def move_files(src_dir, train_dst_dir, valid_dst_dir, file_ids):
    for file_id in file_ids:
        src_file_path = os.path.join(src_dir, file_id + '.nii.gz')
        check_id = file_id.split("_")[0]
        if os.path.isfile(src_file_path):
            # Determine if the file is for 'Train' or 'Valid'
            if check_id in train_ids:
                dst_dir = train_dst_dir
            elif check_id in valid_ids:
                dst_dir = valid_dst_dir
            else:
                print(file_id, "!!!")
            # Move the file to the appropriate directory
            shutil.move(src_file_path, dst_dir)

# Function to get all unique scan ids from the image and mask directories
def get_unique_scan_ids(image_dir, mask_dir):
    image_files = {f.split('.')[0] for f in os.listdir(image_dir) if f.endswith('.nii.gz')}
    mask_files = {f.split('_label')[0] for f in os.listdir(mask_dir) if f.endswith('.nii.gz')}
    return image_files.union(mask_files)

In [None]:
# Get all unique scan ids
scan_ids = get_unique_scan_ids(image_dir, mask_dir)

# Move the image files to the appropriate 'Train' or 'Valid' directories
move_files(image_dir, image_train_dir, image_valid_dir, scan_ids)

# Move the mask files to the appropriate 'Train' or 'Valid' directories, appending '_label' to the id for mask files
move_files(mask_dir, mask_train_dir, mask_valid_dir, {sid + '_label' for sid in scan_ids})

# The code above assumes that the file structure is known and the paths are provided.
# The actual paths need to be replaced in the placeholders. Also, this code will not run here because it requires access to the filesystem.


In [6]:
from glob import glob

In [10]:
len(glob("./dataset/Mask_Train_Data/*.nii.gz"))

800