In [2]:
# Alex Weston
# Digital Innovation Lab, Mayo Clinic

import os
import pydicom # pydicom is using the gdcm package for decompression

def clean_text(string):
    # clean and standardize text descriptions, which makes searching files easier
    forbidden_symbols = ["*", ".", ",", "\"", "\\", "/", "|", "[", "]", ":", ";", " "]
    for symbol in forbidden_symbols:
        string = string.replace(symbol, "_") # replace everything with an underscore
    return string.lower()  
   
# user specified parameters ----- HIER ABSOLUUT PAD INVULLEN, vergeet de dubbele slashes niet
src = "C:\\Users\\jerutten\\OneDrive - UGent\\Desktop\\QCC09042025\\A"
dst = "C:\\Users\\jerutten\\OneDrive - UGent\\Desktop\\QQCC09042025\\sorted"
print('reading file list...')
unsortedList = []
for root, dirs, files in os.walk(src):
    for file in files: 
        #if ".dcm" in file:# exclude non-dicoms, good for messy folders
            unsortedList.append(os.path.join(root, file))

print('%s files found.' % len(unsortedList))
       
for dicom_loc in unsortedList:
    # read the file
    ds = pydicom.dcmread(dicom_loc, force=True)
   
    # get patient, study, and series information
    patientID = clean_text(ds.get("PatientID", "NA"))
    studyDate = clean_text(ds.get("StudyDate", "NA"))
    studyDescription = clean_text(ds.get("StudyDescription", "NA"))
    seriesDescription = clean_text(ds.get("SeriesDescription", "NA"))
    sliceThickness = ds.get("SliceThickness", "NA")
    convKernel = ds.get("ConvolutionKernel", "NA")
    KVP = ds.get("KVP","NA")
    
    # generate new, standardized file name
    modality = ds.get("Modality","NA")
    studyInstanceUID = ds.get("StudyInstanceUID","NA")
    seriesInstanceUID = ds.get("SeriesInstanceUID","NA")
    instanceNumber = str(ds.get("InstanceNumber","0"))
    fileName = modality + "." + instanceNumber + str(KVP) + "_" + str(sliceThickness) + "_" + convKernel + ".dcm"
       
    # uncompress files (using the gdcm package)
    try:
        ds.decompress()
    except:
        print('an instance in file %s - %s - %s - %s" could not be decompressed. exiting.' % (patientID, studyDate, studyDescription, seriesDescription ))
   
    # save files to a 4-tier nested folder structure
    if not os.path.exists(os.path.join(dst, patientID)):
        os.makedirs(os.path.join(dst, patientID))
   
    if not os.path.exists(os.path.join(dst, patientID, studyDate, str(KVP))):
        os.makedirs(os.path.join(dst, patientID, studyDate, str(KVP)))
       
    if not os.path.exists(os.path.join(dst, patientID, studyDate, str(KVP), str(sliceThickness))):
        os.makedirs(os.path.join(dst, patientID, studyDate, str(KVP), str(sliceThickness)))
       
    if not os.path.exists(os.path.join(dst, patientID, studyDate, str(KVP), str(sliceThickness), convKernel)):
        os.makedirs(os.path.join(dst, patientID, studyDate, str(KVP), str(sliceThickness), convKernel))
        print('Saving out file: %s - %s - %s - %s.' % (patientID, str(KVP), str(sliceThickness), convKernel))
       
    ds.save_as(os.path.join(dst, patientID, studyDate, str(KVP), str(sliceThickness), convKernel, fileName))

print('done.')

reading file list...
580 files found.
an instance in file qcciq - 20250409 - abdomen - scout_face___profil" could not be decompressed. exiting.
Saving out file: qcciq - 100 - 321.545471 - STANDARD.
an instance in file qcciq - 20250409 - abdomen - scout_face___profil" could not be decompressed. exiting.
an instance in file qcciq - 20250409 - abdomen - dose_report" could not be decompressed. exiting.
Saving out file: qcciq - NA - NA - NA.
an instance in file qcciq - 20250409 - abdomen - " could not be decompressed. exiting.
Saving out file: qcciq - 120 - 2.500000 - SOFT.
an instance in file qcciq - 20250409 - abdomen - " could not be decompressed. exiting.
an instance in file qcciq - 20250409 - abdomen - " could not be decompressed. exiting.
an instance in file qcciq - 20250409 - abdomen - " could not be decompressed. exiting.
an instance in file qcciq - 20250409 - abdomen - " could not be decompressed. exiting.
an instance in file qcciq - 20250409 - abdomen - " could not be decompressed

Collecting pydicom
  Downloading pydicom-3.0.1-py3-none-any.whl.metadata (9.4 kB)
Downloading pydicom-3.0.1-py3-none-any.whl (2.4 MB)
   ---------------------------------------- 0.0/2.4 MB ? eta -:--:--
   ---------------------------------------- 2.4/2.4 MB 47.0 MB/s eta 0:00:00
Installing collected packages: pydicom
Successfully installed pydicom-3.0.1
Note: you may need to restart the kernel to use updated packages.
