### Copy Dicom files multiple dir to one dir in a patient

In [2]:
import os
import re
import shutil
import pydicom
import numpy as np
import pandas as pd

img_dir = os.path.join(os.getcwd(), "Z:\\CHAVI\\DRAW BREAST RE EXPORT")
output_dir = os.path.join(os.getcwd(), "imgdata\\DRAW BREAST-V2")

In [14]:
def copy_dcm(root_dir, output_dir):
# Moving and organizing DICOM files
    for path, dirs, filenames in os.walk(root_dir):
        for filename in filenames:
            filepath = os.path.join(path, filename)
            try:
                if filepath.endswith(".dcm"):
                    dcm = pydicom.dcmread(filepath)
                    if dcm.Modality == "CT" or dcm.Modality == "RTSTRUCT":
                        patient_id = dcm.PatientID
                        patient_id_ = re.sub('/', '_', patient_id)
                        patient_dir = os.path.join(output_dir, patient_id_)

                        os.makedirs(patient_dir, exist_ok=True)
                        cdir = os.path.join(patient_dir, f"{filename}")
                        shutil.copy(filepath, cdir)

            except Exception as e:
                print(f"Error: {e} - File: {filename}")

root_dir = "D:\\Python\\DRAW DATA PREPARATION\\DRAW BREAST\\imgdata\\New folder\\DRAW BREAST"
output_dir = "D:\\Python\\DRAW DATA PREPARATION\\DRAW BREAST\\imgdata\\New folder\\DRAW ddis"
copy_dcm(root_dir=root_dir, output_dir=output_dir)

### Rename file name (patientid_filenumber)

In [None]:

def rename_files_in_folders(root_folder):
    for folder_name in os.listdir(root_folder):
        folder_path = os.path.join(root_folder, folder_name)

        if os.path.isdir(folder_path):
            file_list = os.listdir(folder_path)
            file_count = 1

            for file_name in file_list:
                old_file_path = os.path.join(folder_path, file_name)
                new_file_name = f"{folder_name}_{file_count}{os.path.splitext(file_name)[1]}"
                new_file_path = os.path.join(folder_path, new_file_name)

                os.rename(old_file_path, new_file_path)
                file_count += 1


rename_files_in_folders(output_dir)

### Read Structure name for RTSTRUCT

In [3]:
rtstruct = []
dcm_path = []

# root_dir = os.path.join(os.getcwd(), "imgdata\\DRAW BREAST-V2 - Copy")
root_dir = "prostate_temp"
# root_dir = "E:\\Rectum DRAW data_processed"

for root, dir, filename in os.walk(root_dir):
    for filenames in filename:
        if filenames.endswith(".dcm"):
            fpath = os.path.join(root, filenames)
            try:
                ds = pydicom.dcmread(fpath)
                if ds.Modality == "RTSTRUCT":
                    for structure_set in ds.StructureSetROISequence:
                        roi_name = structure_set.ROIName
                        rtstruct.append(roi_name)
                        dcm_path.append(root)
                        # dcm_path.append(root.split("\\")[-1])
            except:
                print(f"dicom reading error - {fpath}")

In [5]:
df = pd.DataFrame({
        "dcm_path": dcm_path,
        "rtstruct": rtstruct
})

# df.to_csv("DRAW BREAST-V2 Rename-Rtstruct Structure Name.csv")
df.to_csv("DDIS-Prostate Rtstruct Name.csv")

In [8]:
df1 = df.groupby("rtstruct").count()
df1

Unnamed: 0_level_0,dcm_path
rtstruct,Unnamed: 1_level_1
1UD 36,1
ANORECTUM,18
ANORECTUM NOS,30
ANORECTUM NOS1,30
ANORECTUM NOS1CM,6
...,...
Z_RectumOS60,1
Z_RectumOS62,26
bladder35,1
sigmoid,1


In [4]:
rtstruct_names = [
    'Brain',
    'Brainstem',
    'Cochlea_L',
    'Cochlea_R',
    'Eye_L',
    'Eye_R',
    'Glnd_Lacrimal_L',
    'Glnd_Lacrimal_R',
    'Hippocampus_L',
    'Hippocampus_R',
    'Lens_L',
    'Lens_R',
    'Lobe_Temporal_L',
    'Lobe_Temporal_R',
    'OpticChiasm',
    'OpticNrv_L',
    'OpticNrv_R',
    'Pituitary',
    'Spinalcord'
]

df = pd.DataFrame({
        "dcm_path": dcm_path,
        "rtstruct": rtstruct
        })

In [13]:
structure_count = df["rtstruct"].value_counts().reset_index()
structure_count.columns = ["structure", "count"]
structure_count[structure_count["structure"].isin(rtstruct_names)]

Unnamed: 0,structure,count
1,Eye_L,70
2,OpticNrv_L,70
3,Lens_R,70
4,Lens_L,70
5,Eye_R,70
6,OpticNrv_R,70
7,Brainstem,70
8,OpticChiasm,70
9,Cochlea_R,69
11,Cochlea_L,69


### Useful structure set missing

In [57]:
unique_mr_numbers = df['dcm_path'].unique()
all_combinations = pd.DataFrame(
    [(mr, structure) for mr in unique_mr_numbers for structure in rtstruct_names],
    columns=['dcm_path', 'rtstruct'])


all_combinations

Unnamed: 0,dcm_path,rtstruct
0,12_003030,Brain
1,12_003030,Brainstem
2,12_003030,Cochlea_L
3,12_003030,Cochlea_R
4,12_003030,Eye_L
...,...,...
1382,23_021309,OpticChiasm
1383,23_021309,OpticNrv_L
1384,23_021309,OpticNrv_R
1385,23_021309,Pituitary


In [76]:
missing_data = all_combinations.merge(
    df, on=['dcm_path', 'rtstruct'], how='left', indicator=True
    ).query('_merge == "left_only"').drop('_merge', axis=1)

missing_data

Unnamed: 0,dcm_path,rtstruct
6,12_003030,Glnd_Lacrimal_L
7,12_003030,Glnd_Lacrimal_R
17,12_003030,Pituitary
18,12_003030,Spinalcord
21,12_003856,Cochlea_L
...,...,...
1356,23_020645,Glnd_Lacrimal_R
1374,23_021309,Glnd_Lacrimal_L
1375,23_021309,Glnd_Lacrimal_R
1385,23_021309,Pituitary


In [82]:
missing_data.to_csv("CNS_V1_Missing_Structure.csv", index=False)

In [5]:
df.to_csv("RECTUM Rtstruct harmonize.csv")