In [1]:
import pydicom

In [6]:
import os

# Path to the directory containing DICOM files
dicom_dir = r"D:\personal\scans\mri\Images"

# Read all DICOM files in the directory
dicom_files = [os.path.join(dicom_dir, file) for file in os.listdir(dicom_dir)]



In [7]:
# Read a specific DICOM file
dicom_data = pydicom.dcmread(dicom_files[0])

# Display basic information about the DICOM file
print("\nDICOM file information:")
print(f"Patient Name: {dicom_data.PatientName}")
print(f"Modality: {dicom_data.Modality}")
print(f"Study Date: {dicom_data.StudyDate}")
print(f"Image Size: {dicom_data.Rows}x{dicom_data.Columns}")



DICOM file information:
Patient Name: KELLY^ALEX^PAUL^MR
Modality: MR
Study Date: 20240901
Image Size: 160x160


In [10]:
dicom_data


Dataset.file_meta -------------------------------
(0002,0000) File Meta Information Group Length  UL: 208
(0002,0001) File Meta Information Version       OB: b'\x00\x01'
(0002,0002) Media Storage SOP Class UID         UI: MR Image Storage
(0002,0003) Media Storage SOP Instance UID      UI: 1.3.12.2.1107.5.2.41.169554.2024090118105196091577557
(0002,0010) Transfer Syntax UID                 UI: JPEG Lossless, Non-Hierarchical, First-Order Prediction (Process 14 [Selection Value 1])
(0002,0012) Implementation Class UID            UI: 1.3.6.1.4.1.30071.8
(0002,0013) Implementation Version Name         SH: 'fo-dicom 4.0.8'
(0002,0016) Source Application Entity Title     AE: 'RHQEIPROD'
-------------------------------------------------
(0008,0005) Specific Character Set              CS: 'ISO_IR 100'
(0008,0008) Image Type                          CS: ['ORIGINAL', 'PRIMARY', 'M', 'NORM', 'DIS2D']
(0008,0012) Instance Creation Date              DA: '20240901'
(0008,0013) Instance Creation Tim

In [23]:
def person_names_callback(ds, elem):
    if elem.VR == "PN":
        elem.value = "anonymous"

In [24]:
dicom_data.walk(person_names_callback)

In [31]:
dicom_data


Dataset.file_meta -------------------------------
(0002,0000) File Meta Information Group Length  UL: 208
(0002,0001) File Meta Information Version       OB: b'\x00\x01'
(0002,0002) Media Storage SOP Class UID         UI: MR Image Storage
(0002,0003) Media Storage SOP Instance UID      UI: 1.3.12.2.1107.5.2.41.169554.2024090118105196091577557
(0002,0010) Transfer Syntax UID                 UI: JPEG Lossless, Non-Hierarchical, First-Order Prediction (Process 14 [Selection Value 1])
(0002,0012) Implementation Class UID            UI: 1.3.6.1.4.1.30071.8
(0002,0013) Implementation Version Name         SH: 'fo-dicom 4.0.8'
(0002,0016) Source Application Entity Title     AE: 'RHQEIPROD'
-------------------------------------------------
(0008,0005) Specific Character Set              CS: 'ISO_IR 100'
(0008,0008) Image Type                          CS: ['ORIGINAL', 'PRIMARY', 'M', 'NORM', 'DIS2D']
(0008,0012) Instance Creation Date              DA: '20240901'
(0008,0013) Instance Creation Tim

In [33]:
metadata_ds

Dataset.file_meta -------------------------------
(0002,0000) File Meta Information Group Length  UL: 208
(0002,0001) File Meta Information Version       OB: b'\x00\x01'
(0002,0002) Media Storage SOP Class UID         UI: MR Image Storage
(0002,0003) Media Storage SOP Instance UID      UI: 1.3.12.2.1107.5.2.41.169554.2024090118552057254992870
(0002,0010) Transfer Syntax UID                 UI: JPEG Lossless, Non-Hierarchical, First-Order Prediction (Process 14 [Selection Value 1])
(0002,0012) Implementation Class UID            UI: 1.3.6.1.4.1.30071.8
(0002,0013) Implementation Version Name         SH: 'fo-dicom 4.0.8'
(0002,0016) Source Application Entity Title     AE: 'RHQEIPROD'
-------------------------------------------------
(0008,0005) Specific Character Set              CS: 'ISO_IR 100'
(0008,0008) Image Type                          CS: ['ORIGINAL', 'PRIMARY', 'M', 'NORM', 'DIS2D']
(0008,0012) Instance Creation Date              DA: '20240901'
(0008,0013) Instance Creation Tim

In [None]:
# Create a list to store metadata from all DICOM files
all_metadata = []




# Loop through all DICOM files
for file_path in dicom_files:
    try:
        # Read the DICOM file
        ds = pydicom.dcmread(file_path)
        
        # Create a new dataset with metadata (exclude PixelData)
        metadata_ds = pydicom.Dataset()
        
        # Anonymize personal data
        ds.walk(person_names_callback)
        
        # only include metadata, not pixel data
        for elem in ds:
            if elem.tag != pydicom.tag.Tag('PixelData'):
                metadata_ds[elem.tag] = elem
        
        metadata = metadata_ds.to_json()
        
        # Add to our list
        all_metadata.append(metadata)
        
        print(f"Processed: {file_path}")
    except Exception as e:
        print(f"Error processing {file_path}: {e}")




Processed: D:\personal\scans\mri\Images\1.3.12.2.1107.5.2.41.169554.2024090118105196091577557.DCM
Processed: D:\personal\scans\mri\Images\1.3.12.2.1107.5.2.41.169554.2024090118105199581277588.DCM
Processed: D:\personal\scans\mri\Images\1.3.12.2.1107.5.2.41.169554.2024090118105199657077592.DCM
Processed: D:\personal\scans\mri\Images\1.3.12.2.1107.5.2.41.169554.2024090118105210045577776.DCM
Processed: D:\personal\scans\mri\Images\1.3.12.2.1107.5.2.41.169554.2024090118105210117177780.DCM
Processed: D:\personal\scans\mri\Images\1.3.12.2.1107.5.2.41.169554.2024090118105211274777802.DCM
Processed: D:\personal\scans\mri\Images\1.3.12.2.1107.5.2.41.169554.2024090118105213187477815.DCM
Processed: D:\personal\scans\mri\Images\1.3.12.2.1107.5.2.41.169554.202409011810521562077617.DCM
Processed: D:\personal\scans\mri\Images\1.3.12.2.1107.5.2.41.169554.2024090118105215900477846.DCM
Processed: D:\personal\scans\mri\Images\1.3.12.2.1107.5.2.41.169554.2024090118105215968677850.DCM
Processed: D:\persona

In [37]:
# Write all metadata to a JSON file
import json
import os

# Create output directory if it doesn't exist
output_dir = "output"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Define the output file path
output_file = os.path.join(output_dir, "dicom_metadata.json")

# Write the metadata to the file
with open(output_file, 'w') as f:
    json.dump(all_metadata, f, indent=4)

print(f"Metadata saved to {output_file}")


Metadata saved to output\dicom_metadata.json
