# Convert dicom to jpg

In [12]:
import csv
import os
from concurrent.futures import ProcessPoolExecutor, as_completed

import matplotlib.pyplot as plt
import cv2
import pydicom
from PIL import Image
import numpy as np
from torchvision import transforms
from pydicom.pixel_data_handlers.util import apply_voi_lut
import os
from tqdm import tqdm
import uuid
from datetime import datetime

TO_KEEP = [
    "PixelData",
    "file_meta",
    "BitsAllocated",
    "Rows",
    "Columns",
    "SamplesPerPixel",
    "PhotometricInterpretation",
    "PixelRepresentation",
    "BitsStored",
    "ImagePositionPatient",
    "PixelSpacing",
    "RescaleIntercept",
    "RescaleSlope",
    "WindowCenter",
    "WindowWidth",
    "Manufacturer",
    "PhotometricInterpretation",
    "SliceThickness",
    "ImageOrientationPatient",
    "VOILUTFunction",
    "VOILUTSequence",
    "PresentationLUTShape",
    "LUTExplanation",
    "Exposure",
    "ExposureControlMode",
    "ExposureControlModeDescription",
    "ExposureInuAs",
    "RelativeXRayExposure",
    "ExposuresOnPlate",
    "ExposureIndex",
    "TargetExposureIndex",
    "ExposureTimeInuS",
    "ExposuresOnDetectorSinceLastCalibration",
    "DetectorTimeSinceLastExposure",
    "TotalNumberOfExposures",
    "ExposureStatus",
    "ExposureTime",
    "ExposureInmAs",
    "ExposureModulationType",
    "KVP",
    "Laterality",
    "ImageLaterality",
    "RescaleIntercept",
    "RescaleSlope",
    "RescaleType",
    "XRayTubeCurrent",
    "XRayTubeCurrentInuA",
    "StudyDescription",
    "PatientSize",
    "ConvolutionKernel",
    "ViewPosition",
    "BodyPartExamined",
    "BurnedInAnnotation",
]
to_keep_set = set(TO_KEEP)


def convert_dicom_to_images(input_file_path, jpg_filename):
    dcm_file = pydicom.dcmread(input_file_path)
    attributes = [attribute for attribute in dcm_file.dir()]
    # try:
    #     assert all(attr in to_keep_set for attr in attributes)
    # except AssertionError:
    #     print(f"Error: Assertion failed for {jpg_filename} and {input_file_path}")
    #     return False
    # Check if the DICOM file has one of the required pixel data attributes
    if not any(hasattr(dcm_file, attr) for attr in ["PixelData", "FloatPixelData", "DoubleFloatPixelData"]):
        print(f"Missing pixel data in: {input_file_path}")
        return False
    # Further processing assuming 'PixelData' is present for simplicity
    try:
        pixel_array = dcm_file.pixel_array
        rescaled_image = cv2.convertScaleAbs(pixel_array, alpha=(255.0 / pixel_array.max()))
    except Exception as e:
        print(f"Failed to process pixel data in: {input_file_path}, Error: {e}")
        return False
    if dcm_file.PhotometricInterpretation == "MONOCHROME1":
        rescaled_image = cv2.bitwise_not(rescaled_image)
    adjusted_image = cv2.equalizeHist(rescaled_image)
    cv2.imwrite(jpg_filename, adjusted_image)
    return True


def compare_images(jpg_path1, jpg_path2):
    img1 = Image.open(jpg_path1)
    img_array1 = np.asarray(img1)
    img2 = Image.open(jpg_path2)
    img_array2 = np.asarray(img2)
    if img_array1.shape != img_array2.shape:
        print("The images have different dimensions.")
        return
    difference = np.abs(img_array1 - img_array2)
    assert np.all(difference == 0), (jpg_path1, jpg_path2)


def process_image(dcm_path, jpg_path, base_output_dir="processed_images"):
    unique_filename = f"{uuid.uuid4()}.jpg"
 #   unique_output_path = os.path.join(jpg_path)

    if not os.path.exists(jpg_path):
        os.makedirs(base_output_dir, exist_ok=True)

    if not convert_dicom_to_images(dcm_path, jpg_path):
        print(f"Failed to process {dcm_path}")




In [None]:
dcm_folder  ="/mnt/data2/datasets_lfay/physionet.org/files/vindr-pcxr/1.0.0/train"
jpg_folder= "/mnt/data2/datasets_lfay/MedImageInsights/data/vindr-pcxr/1.0.0/train"

if not os.path.exists(jpg_folder):
    os.makedirs(jpg_folder, exist_ok=True)

for dcm_path in tqdm(os.listdir(dcm_folder)):
    jpg_path = os.path.join(jpg_folder, dcm_path.replace(".dicom", ".jpg"))
    dcm_path = os.path.join(dcm_folder, dcm_path)

    try:
        process_image(dcm_path, jpg_path)
    except Exception as e:
        print(f"Failed to process {dcm_path}, Error: {e}")


In [14]:
dcm_folder  ="/mnt/data2/datasets_lfay/physionet.org/files/vindr-pcxr/1.0.0/test"
jpg_folder= "/mnt/data2/datasets_lfay/MedImageInsights/data/vindr-pcxr/1.0.0/test"

if not os.path.exists(jpg_folder):
    os.makedirs(jpg_folder, exist_ok=True)

for dcm_path in tqdm(os.listdir(dcm_folder)):
    jpg_path = os.path.join(jpg_folder, dcm_path.replace(".dicom", ".jpg"))
    dcm_path = os.path.join(dcm_folder, dcm_path)

    try:
        process_image(dcm_path, jpg_path)
    except Exception as e:
        print(f"Failed to process {dcm_path}, Error: {e}")


 71%|███████   | 987/1398 [00:56<00:21, 19.27it/s]

Failed to process /mnt/data2/datasets_lfay/physionet.org/files/vindr-pcxr/1.0.0/test/index.html, Error: File is missing DICOM File Meta Information header or the 'DICM' prefix is missing from the header. Use force=True to force reading.


100%|██████████| 1398/1398 [01:28<00:00, 15.85it/s]


In [23]:
print(len(os.listdir("/mnt/data2/datasets_lfay/MedImageInsights/data/vindr-pcxr/1.0.0/train")))
print(len(os.listdir("/mnt/data2/datasets_lfay/physionet.org/files/vindr-pcxr/1.0.0/train")))
print(len(os.listdir("/mnt/data2/datasets_lfay/MedImageInsights/data/vindr-pcxr/1.0.0/test")))
print(len(os.listdir("/mnt/data2/datasets_lfay/physionet.org/files/vindr-pcxr/1.0.0/test"))) 

7728
7729
1397
1398
