In [16]:
import torch
from pathlib import Path
from pydicom import dcmread
from PIL import Image
import os
import yaml

In [None]:
# Get all the train data samples and test data samples path as list

data_file = Path('data/pneumonia_dataset')

train_path = list(data_file.glob('stage_2_train_images/*.dcm'))
test_path = list(data_file.glob('stage_2_test_images/*.dcm'))
print(f'Train Data Samples: {len(train_path)}')
print(f'Test Data Samples: {len(test_path)}')

Train Data Samples: 26684
Test Data Samples: 3000


In [None]:
def dcm_to_jpg(dcm_path, save_path):
    '''
    Convert a DICOM (.dcm) file to a JPEG (.jpg) file and save it.

    This function reads a DICOM file from `dcm_path`, converts the pixel data
    to an image, and saves the image as a JPEG file at `save_path`.

    Args:
        dcm_path (str): File path of the input DICOM file.
        save_path (str): File path to save the output JPEG file.

    Returns:
        None
    '''
    save_path = Path(save_path)

    if not save_path.is_dir():
        dcm_data = dcmread(dcm_path)
        dcm_pixel_array = dcm_data.pixel_array
        image = Image.fromarray(dcm_pixel_array)
        image.save(save_path, 'JPEG')


In [None]:
# Create YOLO File format list

yolo_file_format = ['YOLO_format_data', ['images', 'labels', 'data.yaml'], ['train', 'val']]

# Create the YOLO file format directory
for x in range(2):
    for y in range(2):
        form_path = os.path.join(yolo_file_format[0], yolo_file_format[1][x], yolo_file_format[2][y])
        form_path = Path(form_path)
        form_path.mkdir(parents=True, exist_ok=True)

# # Create data.yaml file
yaml_file = Path(yolo_file_format[0]) / yolo_file_format[1][2]

# Content for the data.yaml file as required by the YOLO model
yaml_content = {
    'train': f'{yolo_file_format[1][0]}/{yolo_file_format[2][0]}',
    'val': f'{yolo_file_format[1][0]}/{yolo_file_format[2][1]}',
    'nc': 1,
    'names': ['pneumonia']
}

# write the content into the data.yaml file
with open(yaml_file, 'w') as file:
    yaml.dump(yaml_content, file)

In [20]:
# Save all the training images into the images direcotry

train_image_name_list = []
for x in range(len(train_path)):
    img = f'img{x+1}.jpg'
    train_image_name_list.append(img)

train_yolo_img = Path('YOLO_format_data/images/train')



for index, path in enumerate(train_path):
    save_path = os.path.join(train_yolo_img, train_image_name_list[index])
    dcm_to_jpg(dcm_path=path, save_path=save_path)



In [None]:
# Save all the val images into the val directory

val_image_name_list = []
cont = len(train_image_name_list)
cont
for x in range(len(test_path)):
    img = f'img{cont+x+1}.jpg'
    val_image_name_list.append(img)

val_yolo_img = Path('YOLO_format_data/images/val')

for index, path in enumerate(test_path):
    save_path = os.path.join(val_yolo_img, val_image_name_list[index])
    dcm_to_jpg(dcm_path=path, save_path=save_path)