<h1 style="text-align: center;">Pre-Process 1: Crop Raw Image</h1>

In [12]:
import os
import sys
import numpy as np
from matplotlib import pyplot as plt
from PIL import Image
from scipy.interpolate import interp1d

sys.path.append(os.path.join(os.getcwd(), '..'))
from cardio_modules.ImageTypes import (
    DOPPLER_SPECTROGRAM,
    STRAIN_MAPPING,
    DOPPLER_SPEC_PULSE_SIZE,
    DOPPLER_SPEC_ECG_SIZE,
    STRAIN_MAPPING_PULSE_SIZE,
    STRAIN_MAPPING_ECG_SIZE
)
from cardio_modules.PreProcess import PreProcessing

In [13]:
DOPPLER_SPECTROGRAM

['mv', 'pulmvein', 'trjet', 'tdilat', 'tdimed']

In [14]:
pwd

'/Users/niksun/Desktop/BCM_TCH_Cardiomyopathy_F23/tasks'

In [21]:
GLOBAL_PATH = '/Users/niksun/Desktop/BCM_TCH_Cardiomyopathy_F23'
RAW_DATA_PATH = os.path.join(GLOBAL_PATH, 'raw_data')
RAW_IMAGE_PATH = os.path.join(GLOBAL_PATH, 'raw_data', 'D2K - no DICOM')
OUTPUT_PATH = os.path.join(GLOBAL_PATH, 'output')
PROCESSED_DATA_PATH = os.path.join(GLOBAL_PATH, 'processed_data')

In [16]:
patient_ids = os.listdir(RAW_IMAGE_PATH)
patient_ids = sorted(patient_id for patient_id in patient_ids if patient_id.startswith('DDCM-'))
print(np.array(patient_ids))
print(len(patient_ids))

['DDCM-001' 'DDCM-002' 'DDCM-003' 'DDCM-004' 'DDCM-005' 'DDCM-006'
 'DDCM-007' 'DDCM-008' 'DDCM-009' 'DDCM-010' 'DDCM-011' 'DDCM-012'
 'DDCM-013' 'DDCM-014' 'DDCM-015' 'DDCM-016' 'DDCM-017' 'DDCM-018'
 'DDCM-019' 'DDCM-020' 'DDCM-021' 'DDCM-022' 'DDCM-023' 'DDCM-024'
 'DDCM-025' 'DDCM-026' 'DDCM-027' 'DDCM-028' 'DDCM-029' 'DDCM-030'
 'DDCM-031' 'DDCM-032' 'DDCM-034' 'DDCM-035' 'DDCM-036' 'DDCM-037'
 'DDCM-038' 'DDCM-052' 'DDCM-053' 'DDCM-054' 'DDCM-055' 'DDCM-056'
 'DDCM-057' 'DDCM-058' 'DDCM-059' 'DDCM-060' 'DDCM-061' 'DDCM-062'
 'DDCM-063' 'DDCM-064' 'DDCM-065' 'DDCM-066' 'DDCM-067' 'DDCM-068'
 'DDCM-069' 'DDCM-070' 'DDCM-075' 'DDCM-076' 'DDCM-077' 'DDCM-078'
 'DDCM-079' 'DDCM-082' 'DDCM-083' 'DDCM-085' 'DDCM-086' 'DDCM-087'
 'DDCM-088' 'DDCM-089' 'DDCM-090' 'DDCM-094' 'DDCM-096' 'DDCM-100'
 'DDCM-101' 'DDCM-102' 'DDCM-104' 'DDCM-105' 'DDCM-107' 'DDCM-108'
 'DDCM-109' 'DDCM-110' 'DDCM-112' 'DDCM-113' 'DDCM-114' 'DDCM-116'
 'DDCM-118' 'DDCM-119']
86


In [17]:
PreProcessing = PreProcessing()

In [18]:
# get image data for all patients
all_patient_data = {}

for patient_id in patient_ids:
    patient_data = PreProcessing.get_patient_data(RAW_IMAGE_PATH, patient_id)
    all_patient_data[patient_id] = patient_data

print(len(all_patient_data))
print(all_patient_data['DDCM-001'])

86
{'bullet': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1016x708 at 0x108EE7DF0>, 'tdilat': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1016x708 at 0x108EE7FA0>, 'mv': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1016x708 at 0x12F8554E0>, 'trjet': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1016x708 at 0x12F855E70>, 'lasa2': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1016x708 at 0x108F1C880>, 'pulmvein': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1016x708 at 0x108F1D180>, 'lasa4': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1016x708 at 0x12EF3D420>, 'tdimed': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1016x708 at 0x12EA93FA0>}


In [19]:
# crop image
all_patient_data_cropped = {}
for patient_id, patient_data in all_patient_data.items():
    patient_data_cropped = {}
    for image_name, image_value in patient_data.items():
        if image_name in DOPPLER_SPECTROGRAM:
            spec_pulse_part = PreProcessing.crop_image(image_value, DOPPLER_SPEC_PULSE_SIZE)
            spec_ecg_part = PreProcessing.crop_image(image_value, DOPPLER_SPEC_ECG_SIZE)
            patient_data_cropped[image_name + '_pulse'] = spec_pulse_part
            patient_data_cropped[image_name + '_ecg'] = spec_ecg_part
        elif image_name in STRAIN_MAPPING:
            strain_pulse_part = PreProcessing.crop_image(image_value, STRAIN_MAPPING_PULSE_SIZE)
            strain_ecg_part = PreProcessing.crop_image(image_value, STRAIN_MAPPING_ECG_SIZE)
            patient_data_cropped[image_name + '_pulse'] = strain_pulse_part
            patient_data_cropped[image_name + '_ecg'] = strain_ecg_part
        else:
            # when bullet, do we need to use it?
            pass
    all_patient_data_cropped[patient_id] = patient_data_cropped

print(len(all_patient_data_cropped))
print(all_patient_data_cropped['DDCM-001'])


86
{'tdilat_pulse': <PIL.Image.Image image mode=RGB size=861x450 at 0x12F8DCB20>, 'tdilat_ecg': <PIL.Image.Image image mode=RGB size=861x108 at 0x12F887D30>, 'mv_pulse': <PIL.Image.Image image mode=RGB size=861x450 at 0x12F89D3F0>, 'mv_ecg': <PIL.Image.Image image mode=RGB size=861x108 at 0x12F89EF20>, 'trjet_pulse': <PIL.Image.Image image mode=RGB size=861x450 at 0x12F89DEA0>, 'trjet_ecg': <PIL.Image.Image image mode=RGB size=861x108 at 0x12EA90190>, 'lasa2_pulse': <PIL.Image.Image image mode=RGB size=612x564 at 0x12F89E410>, 'lasa2_ecg': <PIL.Image.Image image mode=RGB size=640x80 at 0x108EE7FD0>, 'pulmvein_pulse': <PIL.Image.Image image mode=RGB size=861x450 at 0x12EA90160>, 'pulmvein_ecg': <PIL.Image.Image image mode=RGB size=861x108 at 0x108EE7F70>, 'lasa4_pulse': <PIL.Image.Image image mode=RGB size=612x564 at 0x12F90F850>, 'lasa4_ecg': <PIL.Image.Image image mode=RGB size=640x80 at 0x12F90FA30>, 'tdimed_pulse': <PIL.Image.Image image mode=RGB size=861x450 at 0x12F90FAC0>, 'tdime

In [23]:
for patient_id, patient_data in all_patient_data_cropped.items():
    for image_name, image_value in patient_data.items():
        image_path = os.path.join(PROCESSED_DATA_PATH, patient_id, image_name + '.jpg')
        os.makedirs(os.path.dirname(image_path), exist_ok=True)
        image_value.save(image_path)
