# LESSON 5: DICOM Metadata Deep Dive
## Biomedical Image Processing - DICOM Module

In this lesson:
- Understanding DICOM tag structure
- Reading and modifying metadata
- Working with private tags
- Extracting acquisition parameters

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pydicom
from pydicom.dataset import Dataset, FileMetaDataset
from pydicom.uid import generate_uid
from pydicom.tag import Tag
from pydicom.datadict import keyword_for_tag, tag_for_keyword
import os

print("Libraries imported successfully!")

## 1. DICOM Tag Structure

Every DICOM attribute is identified by a **tag** consisting of:
- **Group number** (2 bytes / 4 hex digits)
- **Element number** (2 bytes / 4 hex digits)

### Tag Format: (GGGG,EEEE)

| Group | Description |
|-------|-------------|
| 0008 | Study/Series information |
| 0010 | Patient information |
| 0018 | Acquisition parameters |
| 0020 | Relationship (position, orientation) |
| 0028 | Image pixel description |
| 7FE0 | Pixel data |

### Value Representation (VR)
Each tag has a VR that defines the data type:
- **PN**: Person Name
- **DA**: Date (YYYYMMDD)
- **TM**: Time (HHMMSS)
- **LO**: Long String
- **DS**: Decimal String
- **IS**: Integer String
- **US**: Unsigned Short
- **OW**: Other Word (pixel data)

In [None]:
# Create a sample DICOM for demonstration
def create_sample_dicom():
    file_meta = FileMetaDataset()
    file_meta.MediaStorageSOPClassUID = pydicom.uid.CTImageStorage
    file_meta.MediaStorageSOPInstanceUID = generate_uid()
    file_meta.TransferSyntaxUID = pydicom.uid.ExplicitVRLittleEndian
    file_meta.ImplementationClassUID = generate_uid()
    
    ds = Dataset()
    ds.file_meta = file_meta
    ds.is_little_endian = True
    ds.is_implicit_VR = False
    ds.preamble = b'\x00' * 128
    
    # Patient Information (Group 0010)
    ds.PatientName = "Doe^John^Mr"
    ds.PatientID = "PAT12345"
    ds.PatientBirthDate = "19800515"
    ds.PatientSex = "M"
    ds.PatientAge = "044Y"
    ds.PatientWeight = 75.5
    
    # Study Information (Group 0008)
    ds.StudyDate = "20240120"
    ds.StudyTime = "143052"
    ds.StudyDescription = "CT Chest with Contrast"
    ds.StudyInstanceUID = generate_uid()
    ds.AccessionNumber = "ACC001"
    ds.ReferringPhysicianName = "Smith^Jane^Dr"
    ds.InstitutionName = "City Hospital"
    ds.StationName = "CT_SCANNER_01"
    ds.Manufacturer = "Medical Imaging Co."
    ds.ManufacturerModelName = "UltraCT 5000"
    
    # Series Information
    ds.Modality = "CT"
    ds.SeriesDescription = "Axial 2.5mm"
    ds.SeriesInstanceUID = generate_uid()
    ds.SeriesNumber = 3
    ds.BodyPartExamined = "CHEST"
    
    # Acquisition Parameters (Group 0018)
    ds.KVP = 120  # Tube voltage
    ds.XRayTubeCurrent = 250  # mA
    ds.ExposureTime = 500  # ms
    ds.Exposure = 125  # mAs
    ds.ConvolutionKernel = "STANDARD"
    ds.SliceThickness = 2.5
    ds.SpacingBetweenSlices = 2.5
    ds.DataCollectionDiameter = 500
    ds.ReconstructionDiameter = 350
    ds.GantryDetectorTilt = 0
    ds.TableHeight = 150
    ds.RotationDirection = "CW"
    ds.ContrastBolusAgent = "Omnipaque 350"
    ds.ContrastBolusVolume = 100  # mL
    
    # Image Information (Group 0028)
    ds.Rows = 512
    ds.Columns = 512
    ds.BitsAllocated = 16
    ds.BitsStored = 12
    ds.HighBit = 11
    ds.PixelRepresentation = 1
    ds.SamplesPerPixel = 1
    ds.PhotometricInterpretation = "MONOCHROME2"
    ds.PixelSpacing = [0.684, 0.684]
    ds.RescaleIntercept = -1024
    ds.RescaleSlope = 1
    ds.WindowCenter = [40, -500]
    ds.WindowWidth = [400, 1500]
    ds.WindowCenterWidthExplanation = ["SOFT_TISSUE", "LUNG"]
    
    # Position (Group 0020)
    ds.SOPClassUID = pydicom.uid.CTImageStorage
    ds.SOPInstanceUID = file_meta.MediaStorageSOPInstanceUID
    ds.InstanceNumber = 42
    ds.ImagePositionPatient = [-175.0, -175.0, 105.0]
    ds.ImageOrientationPatient = [1, 0, 0, 0, 1, 0]
    ds.SliceLocation = 105.0
    ds.FrameOfReferenceUID = generate_uid()
    
    # Create pixel data
    np.random.seed(42)
    pixel_array = np.random.randint(900, 1100, (512, 512), dtype=np.int16)
    ds.PixelData = pixel_array.tobytes()
    
    return ds

ds = create_sample_dicom()
print("Sample DICOM created with rich metadata!")

## 2. Exploring Tags

In [None]:
# List all tags in the dataset
print("ALL DICOM TAGS")
print("=" * 70)
print(f"{'Tag':<15} {'VR':<5} {'Keyword':<35} {'Value'}")
print("-" * 70)

for elem in ds:
    if elem.tag.group != 0x7FE0:  # Skip pixel data
        tag_str = f"({elem.tag.group:04X},{elem.tag.element:04X})"
        vr = elem.VR
        keyword = elem.keyword
        value = str(elem.value)
        if len(value) > 30:
            value = value[:27] + "..."
        print(f"{tag_str:<15} {vr:<5} {keyword:<35} {value}")

In [None]:
# Access tags by different methods
print("ACCESSING TAGS - Different Methods")
print("=" * 50)

# Method 1: By keyword (most common)
print(f"\n1. By keyword:")
print(f"   ds.PatientName = {ds.PatientName}")

# Method 2: By tag tuple
print(f"\n2. By tag tuple:")
print(f"   ds[0x0010, 0x0010] = {ds[0x0010, 0x0010].value}")

# Method 3: By Tag object
print(f"\n3. By Tag object:")
tag = Tag(0x0010, 0x0010)
print(f"   ds[Tag(0x0010, 0x0010)] = {ds[tag].value}")

# Method 4: By hex string
print(f"\n4. Finding tag from keyword:")
patient_name_tag = tag_for_keyword('PatientName')
print(f"   tag_for_keyword('PatientName') = {patient_name_tag}")

# Method 5: Finding keyword from tag
print(f"\n5. Finding keyword from tag:")
keyword = keyword_for_tag(0x00100010)
print(f"   keyword_for_tag(0x00100010) = {keyword}")

## 3. Extracting Specific Information Groups

In [None]:
def extract_patient_info(ds):
    """Extract patient-related information."""
    info = {}
    patient_tags = [
        ('PatientName', 'Name'),
        ('PatientID', 'ID'),
        ('PatientBirthDate', 'Birth Date'),
        ('PatientSex', 'Sex'),
        ('PatientAge', 'Age'),
        ('PatientWeight', 'Weight (kg)'),
    ]
    
    for tag, label in patient_tags:
        value = getattr(ds, tag, None)
        if value is not None:
            info[label] = str(value)
    
    return info

patient_info = extract_patient_info(ds)
print("PATIENT INFORMATION")
print("=" * 40)
for key, value in patient_info.items():
    print(f"  {key}: {value}")

In [None]:
def extract_acquisition_params(ds):
    """Extract CT acquisition parameters."""
    params = {}
    
    acquisition_tags = [
        ('KVP', 'Tube Voltage (kVp)'),
        ('XRayTubeCurrent', 'Tube Current (mA)'),
        ('ExposureTime', 'Exposure Time (ms)'),
        ('Exposure', 'Exposure (mAs)'),
        ('SliceThickness', 'Slice Thickness (mm)'),
        ('ConvolutionKernel', 'Reconstruction Kernel'),
        ('ReconstructionDiameter', 'FOV (mm)'),
        ('GantryDetectorTilt', 'Gantry Tilt (deg)'),
        ('ContrastBolusAgent', 'Contrast Agent'),
        ('ContrastBolusVolume', 'Contrast Volume (mL)'),
    ]
    
    for tag, label in acquisition_tags:
        value = getattr(ds, tag, None)
        if value is not None:
            params[label] = value
    
    return params

acq_params = extract_acquisition_params(ds)
print("\nACQUISITION PARAMETERS")
print("=" * 40)
for key, value in acq_params.items():
    print(f"  {key}: {value}")

In [None]:
def extract_image_params(ds):
    """Extract image-related parameters."""
    params = {}
    
    # Basic dimensions
    params['Rows'] = getattr(ds, 'Rows', None)
    params['Columns'] = getattr(ds, 'Columns', None)
    
    # Pixel spacing
    ps = getattr(ds, 'PixelSpacing', None)
    if ps:
        params['Pixel Spacing (mm)'] = f"{ps[0]:.3f} x {ps[1]:.3f}"
    
    # Bit depth
    params['Bits Allocated'] = getattr(ds, 'BitsAllocated', None)
    params['Bits Stored'] = getattr(ds, 'BitsStored', None)
    
    # Rescale
    params['Rescale Slope'] = getattr(ds, 'RescaleSlope', None)
    params['Rescale Intercept'] = getattr(ds, 'RescaleIntercept', None)
    
    # Window settings
    wc = getattr(ds, 'WindowCenter', None)
    ww = getattr(ds, 'WindowWidth', None)
    if wc and ww:
        if isinstance(wc, pydicom.multival.MultiValue):
            params['Window Presets'] = list(zip(wc, ww))
        else:
            params['Window (C/W)'] = f"{wc} / {ww}"
    
    # Position
    params['Slice Location (mm)'] = getattr(ds, 'SliceLocation', None)
    params['Instance Number'] = getattr(ds, 'InstanceNumber', None)
    
    return params

img_params = extract_image_params(ds)
print("\nIMAGE PARAMETERS")
print("=" * 40)
for key, value in img_params.items():
    print(f"  {key}: {value}")

## 4. Modifying Metadata

In [None]:
# Create a copy to modify
ds_modified = create_sample_dicom()

print("BEFORE MODIFICATION:")
print(f"  PatientName: {ds_modified.PatientName}")
print(f"  StudyDescription: {ds_modified.StudyDescription}")
print(f"  WindowCenter: {ds_modified.WindowCenter}")

# Modify existing tags
ds_modified.PatientName = "Anonymous^Patient"
ds_modified.StudyDescription = "Modified Study Description"
ds_modified.WindowCenter = 50
ds_modified.WindowWidth = 350

print("\nAFTER MODIFICATION:")
print(f"  PatientName: {ds_modified.PatientName}")
print(f"  StudyDescription: {ds_modified.StudyDescription}")
print(f"  WindowCenter: {ds_modified.WindowCenter}")

In [None]:
# Adding new tags
from pydicom.dataelem import DataElement

# Method 1: Direct assignment (if keyword exists)
ds_modified.InstitutionalDepartmentName = "Radiology"

# Method 2: Using DataElement (for any tag)
new_elem = DataElement(
    tag=Tag(0x0008, 0x1030),  # StudyDescription
    VR='LO',
    value='New Description via DataElement'
)
ds_modified.add(new_elem)

print("Added new tags:")
print(f"  InstitutionalDepartmentName: {ds_modified.InstitutionalDepartmentName}")
print(f"  StudyDescription: {ds_modified.StudyDescription}")

In [None]:
# Deleting tags
print("BEFORE DELETION:")
print(f"  Has PatientWeight: {hasattr(ds_modified, 'PatientWeight')}")
print(f"  PatientWeight: {ds_modified.PatientWeight}")

# Delete a tag
if hasattr(ds_modified, 'PatientWeight'):
    del ds_modified.PatientWeight

print("\nAFTER DELETION:")
print(f"  Has PatientWeight: {hasattr(ds_modified, 'PatientWeight')}")

## 5. Working with Sequences

In [None]:
# Sequences are nested datasets (like arrays of objects)
# Common example: Referenced Study Sequence

from pydicom.sequence import Sequence

# Create a sequence item
code_item = Dataset()
code_item.CodeValue = "T-D4000"
code_item.CodingSchemeDesignator = "SRT"
code_item.CodeMeaning = "Chest"

# Add sequence to dataset
ds_modified.AnatomicRegionSequence = Sequence([code_item])

print("SEQUENCE EXAMPLE - Anatomic Region")
print("=" * 40)
for i, item in enumerate(ds_modified.AnatomicRegionSequence):
    print(f"\nItem {i}:")
    for elem in item:
        print(f"  {elem.keyword}: {elem.value}")

In [None]:
# Creating a more complex sequence (Procedure Code Sequence)
proc_item1 = Dataset()
proc_item1.CodeValue = "CT001"
proc_item1.CodingSchemeDesignator = "LOCAL"
proc_item1.CodeMeaning = "CT Chest with Contrast"

proc_item2 = Dataset()
proc_item2.CodeValue = "CT002"
proc_item2.CodingSchemeDesignator = "LOCAL"
proc_item2.CodeMeaning = "CT Chest High Resolution"

ds_modified.ProcedureCodeSequence = Sequence([proc_item1, proc_item2])

print("PROCEDURE CODE SEQUENCE")
print("=" * 40)
for i, item in enumerate(ds_modified.ProcedureCodeSequence):
    print(f"\nProcedure {i + 1}:")
    print(f"  Code: {item.CodeValue}")
    print(f"  Meaning: {item.CodeMeaning}")

## 6. Date and Time Handling

In [None]:
from datetime import datetime, date, time

def parse_dicom_date(date_str):
    """Parse DICOM date (YYYYMMDD) to Python date."""
    if date_str and len(date_str) >= 8:
        return datetime.strptime(date_str[:8], '%Y%m%d').date()
    return None

def parse_dicom_time(time_str):
    """Parse DICOM time (HHMMSS.ffffff) to Python time."""
    if time_str:
        # Handle different time formats
        time_str = time_str.split('.')[0]  # Remove fractional seconds
        if len(time_str) >= 6:
            return datetime.strptime(time_str[:6], '%H%M%S').time()
        elif len(time_str) >= 4:
            return datetime.strptime(time_str[:4], '%H%M').time()
    return None

def parse_dicom_datetime(ds):
    """Extract and parse date/time from DICOM."""
    result = {}
    
    # Study date/time
    study_date = parse_dicom_date(getattr(ds, 'StudyDate', ''))
    study_time = parse_dicom_time(getattr(ds, 'StudyTime', ''))
    if study_date:
        result['Study Date'] = study_date.strftime('%Y-%m-%d')
    if study_time:
        result['Study Time'] = study_time.strftime('%H:%M:%S')
    if study_date and study_time:
        result['Study DateTime'] = datetime.combine(study_date, study_time)
    
    # Patient birth date
    birth_date = parse_dicom_date(getattr(ds, 'PatientBirthDate', ''))
    if birth_date:
        result['Birth Date'] = birth_date.strftime('%Y-%m-%d')
        # Calculate age
        if study_date:
            age = (study_date - birth_date).days // 365
            result['Calculated Age'] = f"{age} years"
    
    return result

datetime_info = parse_dicom_datetime(ds)
print("DATE/TIME INFORMATION")
print("=" * 40)
for key, value in datetime_info.items():
    print(f"  {key}: {value}")

## 7. Searching for Tags

In [None]:
def search_tags(ds, keyword_pattern):
    """
    Search for tags containing a pattern in their keyword.
    """
    matches = []
    keyword_pattern = keyword_pattern.lower()
    
    for elem in ds:
        if keyword_pattern in elem.keyword.lower():
            matches.append({
                'tag': f"({elem.tag.group:04X},{elem.tag.element:04X})",
                'keyword': elem.keyword,
                'vr': elem.VR,
                'value': elem.value
            })
    
    return matches

# Search for all "Patient" related tags
patient_tags = search_tags(ds, "patient")
print("TAGS CONTAINING 'PATIENT'")
print("=" * 60)
for t in patient_tags:
    print(f"{t['tag']} {t['keyword']}: {t['value']}")

In [None]:
# Search for window-related tags
window_tags = search_tags(ds, "window")
print("\nTAGS CONTAINING 'WINDOW'")
print("=" * 60)
for t in window_tags:
    print(f"{t['tag']} {t['keyword']}: {t['value']}")

## 8. Comprehensive Metadata Report

In [None]:
def generate_dicom_report(ds):
    """Generate a comprehensive report of DICOM metadata."""
    
    report = []
    report.append("=" * 60)
    report.append("DICOM METADATA REPORT")
    report.append("=" * 60)
    
    # Patient
    report.append("\n--- PATIENT ---")
    report.append(f"Name: {getattr(ds, 'PatientName', 'N/A')}")
    report.append(f"ID: {getattr(ds, 'PatientID', 'N/A')}")
    report.append(f"Birth Date: {getattr(ds, 'PatientBirthDate', 'N/A')}")
    report.append(f"Sex: {getattr(ds, 'PatientSex', 'N/A')}")
    report.append(f"Age: {getattr(ds, 'PatientAge', 'N/A')}")
    
    # Study
    report.append("\n--- STUDY ---")
    report.append(f"Date: {getattr(ds, 'StudyDate', 'N/A')}")
    report.append(f"Time: {getattr(ds, 'StudyTime', 'N/A')}")
    report.append(f"Description: {getattr(ds, 'StudyDescription', 'N/A')}")
    report.append(f"Institution: {getattr(ds, 'InstitutionName', 'N/A')}")
    report.append(f"Referring Physician: {getattr(ds, 'ReferringPhysicianName', 'N/A')}")
    
    # Series
    report.append("\n--- SERIES ---")
    report.append(f"Modality: {getattr(ds, 'Modality', 'N/A')}")
    report.append(f"Description: {getattr(ds, 'SeriesDescription', 'N/A')}")
    report.append(f"Body Part: {getattr(ds, 'BodyPartExamined', 'N/A')}")
    report.append(f"Series Number: {getattr(ds, 'SeriesNumber', 'N/A')}")
    
    # Equipment
    report.append("\n--- EQUIPMENT ---")
    report.append(f"Manufacturer: {getattr(ds, 'Manufacturer', 'N/A')}")
    report.append(f"Model: {getattr(ds, 'ManufacturerModelName', 'N/A')}")
    report.append(f"Station: {getattr(ds, 'StationName', 'N/A')}")
    
    # Acquisition (CT specific)
    if getattr(ds, 'Modality', '') == 'CT':
        report.append("\n--- CT ACQUISITION ---")
        report.append(f"kVp: {getattr(ds, 'KVP', 'N/A')}")
        report.append(f"mA: {getattr(ds, 'XRayTubeCurrent', 'N/A')}")
        report.append(f"Exposure (mAs): {getattr(ds, 'Exposure', 'N/A')}")
        report.append(f"Slice Thickness: {getattr(ds, 'SliceThickness', 'N/A')} mm")
        report.append(f"Kernel: {getattr(ds, 'ConvolutionKernel', 'N/A')}")
        report.append(f"FOV: {getattr(ds, 'ReconstructionDiameter', 'N/A')} mm")
    
    # Image
    report.append("\n--- IMAGE ---")
    report.append(f"Size: {getattr(ds, 'Rows', 'N/A')} x {getattr(ds, 'Columns', 'N/A')}")
    ps = getattr(ds, 'PixelSpacing', None)
    if ps:
        report.append(f"Pixel Spacing: {ps[0]:.3f} x {ps[1]:.3f} mm")
    report.append(f"Bits: {getattr(ds, 'BitsStored', 'N/A')} stored / {getattr(ds, 'BitsAllocated', 'N/A')} allocated")
    report.append(f"Instance Number: {getattr(ds, 'InstanceNumber', 'N/A')}")
    report.append(f"Slice Location: {getattr(ds, 'SliceLocation', 'N/A')} mm")
    
    report.append("\n" + "=" * 60)
    
    return "\n".join(report)

# Generate and print report
report = generate_dicom_report(ds)
print(report)

## Summary

What we learned:
1. DICOM tags are identified by **(Group, Element)** pairs
2. Access tags by **keyword**, **tuple**, or **Tag object**
3. Each tag has a **Value Representation (VR)** defining its type
4. **Modify** tags by direct assignment or DataElement
5. **Sequences** are nested datasets (arrays of objects)
6. Parse **dates/times** from DICOM format (YYYYMMDD, HHMMSS)
7. **Search** tags by keyword pattern