# Convert dicom into .png for easier time processing the data 

In [None]:
import os
import pydicom
import numpy as np
from PIL import Image
import shutil

input_dir = r"D:\vinbigdata-chest-xray-abnormalities-detection\train"
output_dir = r"D:\vinbigdata-chest-xray-abnormalities-detection\train_png"

os.makedirs(output_dir, exist_ok=True)
dicom_files = [f for f in os.listdir(input_dir) if f.lower().endswith(".dicom")]

def convert_dicom_to_png(dicom_file):
    try:
        dicom_path = os.path.join(input_dir, dicom_file)
        output_path = os.path.join(output_dir, dicom_file.replace(".dicom", ".png"))

        dicom = pydicom.dcmread(dicom_path)
        image = dicom.pixel_array  # Get pixel data

        # Check the Photometric Interpretation --- this is due to the dataset having inversions!
        photometric_interpretation = dicom.get("PhotometricInterpretation", "UNKNOWN")
        if photometric_interpretation == "MONOCHROME1":
            image = np.invert(image)

        if image.dtype != np.uint8:
            image = (image - image.min()) / (image.max() - image.min()) * 255
            image = image.astype(np.uint8)
        image_rgb = Image.fromarray(image).convert("RGB")
        image_rgb.save(output_path, "PNG")
    except Exception as e:
        print(f"Error processing {dicom_file}: {e}")

for dicom_file in dicom_files:
    convert_dicom_to_png(dicom_file)

print("Done!")



📂 Checking DICOM files in: D:\vinbigdata-chest-xray-abnormalities-detection\train
🔍 Found 15000 DICOM files.
🎉 Conversion completed!


# Access Metadata

In [None]:
import pydicom

dicom_file = r"D:\vinbigdata-chest-xray-abnormalities-detection\train\01a1e85c56486eccd29f3e824e745c2a.dicom"
dicom = pydicom.dcmread(dicom_file)
print(dicom)


Dataset.file_meta -------------------------------
(0002,0000) File Meta Information Group Length  UL: 166
(0002,0001) File Meta Information Version       OB: b'\x00\x01'
(0002,0002) Media Storage SOP Class UID         UI: Digital X-Ray Image Storage - For Presentation
(0002,0003) Media Storage SOP Instance UID      UI: 01a1e85c56486eccd29f3e824e745c2a
(0002,0010) Transfer Syntax UID                 UI: JPEG 2000 Image Compression (Lossless Only)
(0002,0012) Implementation Class UID            UI: 1.2.3.4
(0002,0013) Implementation Version Name         SH: 'INF_3.9'
(0002,0016) Source Application Entity Title     AE: 'TITAN'
-------------------------------------------------
(0010,0040) Patient's Sex                       CS: 'M'
(0010,1010) Patient's Age                       AS: '000Y'
(0028,0002) Samples per Pixel                   US: 1
(0028,0004) Photometric Interpretation          CS: 'MONOCHROME2'
(0028,0008) Number of Frames                    IS: '1'
(0028,0010) Rows           

  warn_and_log(msg)


# Converts large-size png images to 224x224 each with resizing


In [None]:
import os
from PIL import Image
from skimage.transform import resize
import numpy as np

# Define source and destination directories
source_dir = r"D:\vinbigdata-chest-xray-abnormalities-detection\train_png"
destination_dir = r"D:\vinbigdata-chest-xray-abnormalities-detection\train_png_224"

if not os.path.exists(destination_dir):
    os.makedirs(destination_dir)

for filename in os.listdir(source_dir):
    if filename.endswith(".png"):  # Process only PNG images
        img_path = os.path.join(source_dir, filename)
        img = Image.open(img_path)
        
        # Convert image to numpy array for resizing
        img_array = np.array(img)
        resized_array = resize(img_array, (224, 224), anti_aliasing=True)
        
        # Convert back to PIL Image and save
        resized_img = Image.fromarray((resized_array * 255).astype(np.uint8))
        resized_img.save(os.path.join(destination_dir, filename))

print("Resizing complete. All images saved to:", destination_dir)


Resizing complete. All images saved to: D:\vinbigdata-chest-xray-abnormalities-detection\train_png_224
