In [60]:
import pydicom
import os
import numpy as np

## Read in files from disk

In [9]:
dir = "harvard-chest-ct/98.12.2"
files = os.listdir(dir)

instances = [pydicom.dcmread(os.path.join(dir, file)) for file in files]
print("Number of instances:", len(instances))

Number of instances: 427


In [13]:
print(instances[0])

Dataset.file_meta -------------------------------
(0002, 0000) File Meta Information Group Length  UL: 210
(0002, 0001) File Meta Information Version       OB: b'\x00\x01'
(0002, 0002) Media Storage SOP Class UID         UI: CT Image Storage
(0002, 0003) Media Storage SOP Instance UID      UI: 1.3.6.1.4.1.9590.100.1.2.320298050811801098832515646773789048894
(0002, 0010) Transfer Syntax UID                 UI: Explicit VR Little Endian
(0002, 0012) Implementation Class UID            UI: 1.3.6.1.4.1.9590.100.1.3.100.9.4
(0002, 0013) Implementation Version Name         SH: 'MATLAB IPT 9.4'
-------------------------------------------------
(0008, 0005) Specific Character Set              CS: 'ISO_IR 100'
(0008, 0008) Image Type                          CS: ['ORIGINAL', 'PRIMARY', 'AXIAL', 'HELICAL']
(0008, 0012) Instance Creation Date              DA: '20200311'
(0008, 0013) Instance Creation Time              TM: '113914.171'
(0008, 0016) SOP Class UID                       UI: CT Image 

In [43]:
print("Element name:", instances[0][0x0020, 0x000e].name)
print("Element tag:", instances[0][0x0020, 0x000e].tag)
print("Element VR:", instances[0][0x0020, 0x000e].VR)
print("Element Value:", instances[0][0x0020, 0x000e].value)
print("----")
print("Element name:", instances[0][0x0020, 0x0010].name)
print("Element tag:", instances[0][0x0020, 0x0010].tag)
print("Element VR:", instances[0][0x0020, 0x0010].VR)
print("Element Value:", instances[0][0x0020, 0x0010].value)

Element name: Series Instance UID
Element tag: (0020, 000e)
Element VR: UI
Element Value: 1.2.156.14702.1.1000.16.1.2020031111365293700020003
----
Element name: Study ID
Element tag: (0020, 0010)
Element VR: SH
Element Value: 93725


In [45]:
dicom_dataset = {}
for instance in instances:
    if instance.StudyID not in dicom_dataset:
        # new study
        dicom_dataset[instance.StudyID] = {}
        
    if instance.StudyID in dicom_dataset:
        # part of existing study
        
        if instance.SeriesInstanceUID in dicom_dataset[instance.StudyID]:
            # part of existing series
            dicom_dataset[instance.StudyID][instance.SeriesInstanceUID] += [instance]
        else:
            # new series
            dicom_dataset[instance.StudyID][instance.SeriesInstanceUID] = [instance]

        
    

In [54]:
print("List of Studies:", list(dicom_dataset.keys()))
print("List of Series:", list(dicom_dataset["93725"].keys()))

List of Studies: ['93725']
List of Series: ['1.2.156.14702.1.1000.16.1.2020031111365293700020003', '1.2.156.14702.1.1000.16.1.2020031111365289000020001', '1.2.156.14702.1.1000.16.1.2020031111365295300020004', '1.2.156.14702.1.1000.16.1.2020031111365290600020002', '1.2.156.14702.1.1000.16.1.2020031111360523400010001', '1.2.156.14702.1.1000.16.1.20200311114023234000110001']


## Pixel Data

In [57]:
print(instance.PhotometricInterpretation)

MONOCHROME2


In [58]:
print(instance.PixelData)

b'\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x00\x19\x003\x00\x15\x00\x05\x00b\x00\x14\x00\x00\x00\x01\x00\t\x00\'\x00.\x00\x1a\x00\x1b\x00(\x00%\x008\x00/\x00\x00\x00\x00\x00%\x00!\x00\t\x00\x15\x00 \x002\x00!\x006\x00\x1c\x00\x08\x00\x1c\x00\x1d\x00(\x00)\x00\x14\x00\t\x00\x12\x00\x14\x006\x00*\x00\x11\x00\x00\x00\x02\x00;\x00;\x00\x17\x00\x1e\x00\x00\x00\x11\x00\x1f\x00\x18\x00\x07\x009\x00H\x00.\x00!\x00\x17\x00!\x00\x1c\x00\x00\x00\x10\x00!\x006\x00\x00\x00\x00\x00\t\x007\x00"\x00\x00\x00\x1b\x00;\x0

In [61]:
print("The pixel array:", instance.pixel_array)
print("Unique values in the pixel array:", np.unique(instance.pixel_array))

[[25 25 25 ... 25 25 25]
 [25 25 25 ... 25 25 25]
 [25 25 25 ... 25 25 25]
 ...
 [25 25 25 ... 25 25 25]
 [25 25 25 ... 25 25 25]
 [25 25 25 ... 25 25 25]]
[   0    1    2 ... 3641 3703 3761]
