In [7]:
import os
import shutil
import pandas as pd

# Paths to the old and new datasets
old_dataset_dir = '/kaggle/input/newdataset1234/train_images'
output_dataset_dir = '/kaggle/working/renamed_images'

# Load the series description mapping
df_series_description = pd.read_csv("/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/train_series_descriptions.csv")

# Ensure the series description is formatted without spaces and with hyphens as needed
series_description_map = dict(
    zip(
        df_series_description['series_id'],
        df_series_description['series_description'].str.replace(" ", "_").str.replace("_T2", "-T2")
    )
)

# Recreate the output directory
if os.path.exists(output_dataset_dir):
    shutil.rmtree(output_dataset_dir)
os.makedirs(output_dataset_dir, exist_ok=True)

# Iterate over the study IDs in the old dataset
for study_id in os.listdir(old_dataset_dir):
    old_study_dir = os.path.join(old_dataset_dir, study_id)
    
    # Ensure we only process directories
    if not os.path.isdir(old_study_dir):
        print(f"Skipping non-directory item: {old_study_dir}")
        continue
    
    # Create the corresponding study directory in the output folder
    output_study_dir = os.path.join(output_dataset_dir, study_id)
    os.makedirs(output_study_dir, exist_ok=True)
    
    # Iterate over the series IDs in the study directory
    for series_id in os.listdir(old_study_dir):
        old_series_dir = os.path.join(old_study_dir, series_id)
        
        # Ensure we only process directories
        if not os.path.isdir(old_series_dir):
            print(f"Skipping non-directory item: {old_series_dir}")
            continue
        
        # Get the series description for this series ID
        series_description = series_description_map.get(int(series_id), None)
        
        if series_description:
            # Ensure the series description has no spaces and uses hyphens as needed
            series_description = series_description.replace("__", "_").replace("_", "-")
            
            # Create a new directory name based on the series description
            output_series_dir = os.path.join(output_study_dir, series_description)
            os.makedirs(output_series_dir, exist_ok=True)

            # Copy each file from the old series directory to the new one
            for file_name in os.listdir(old_series_dir):
                src_file_path = os.path.join(old_series_dir, file_name)
                # Rename the file to ensure uniqueness
                new_file_name = f"{series_id}_{file_name}"
                dst_file_path = os.path.join(output_series_dir, new_file_name)
                shutil.copy(src_file_path, dst_file_path)
                print(f"Copied {src_file_path} to {dst_file_path}")
        else:
            print(f"No description found for series_id: {series_id}, skipping...")

print("Renaming and reorganization complete!")


Copied /kaggle/input/newdataset1234/train_images/1737682527/1510698437/32.png to /kaggle/working/renamed_images/1737682527/Axial-T2/1510698437_32.png
Copied /kaggle/input/newdataset1234/train_images/1972129014/2898623075/10.png to /kaggle/working/renamed_images/1972129014/Sagittal-T2/STIR/2898623075_10.png
Copied /kaggle/input/newdataset1234/train_images/1972129014/3324327485/14.png to /kaggle/working/renamed_images/1972129014/Axial-T2/3324327485_14.png
Copied /kaggle/input/newdataset1234/train_images/1972129014/3324327485/18.png to /kaggle/working/renamed_images/1972129014/Axial-T2/3324327485_18.png
Copied /kaggle/input/newdataset1234/train_images/1972129014/3324327485/7.png to /kaggle/working/renamed_images/1972129014/Axial-T2/3324327485_7.png
Copied /kaggle/input/newdataset1234/train_images/1972129014/3324327485/3.png to /kaggle/working/renamed_images/1972129014/Axial-T2/3324327485_3.png
Copied /kaggle/input/newdataset1234/train_images/1972129014/3324327485/8.png to /kaggle/working/