In [None]:
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import numpy as np
from PIL import Image

def adjust_bits_stored(data, bits_stored_actual, bits_stored_meta):
    # Adjust the bit depth of the image data to match metadata
    shift = bits_stored_actual - bits_stored_meta
    if shift > 0:
        data = data >> shift  # Shift right to decrease bit depth
    elif shift < 0:
        data = data << abs(shift)  # Shift left to increase bit depth
    return data

def load_dicom(path, target_bits=16):
    # Read the DICOM file
    dicom = pydicom.read_file(path, force=True)
    if 'PixelData' in dicom:
        # Apply VOI LUT to the DICOM image data
        data = apply_voi_lut(dicom.pixel_array, dicom)
        
        # Invert MONOCHROME1 images to match the usual black background convention
        if dicom.PhotometricInterpretation == "MONOCHROME1":
            data = np.amax(data) - data
        
        # Determine the actual and target bit depth
        bits_stored_actual = dicom.BitsAllocated
        bits_stored_meta = target_bits if 'BitsStored' in dicom else bits_stored_actual
        
        # Adjust bit depth if necessary
        if bits_stored_actual != bits_stored_meta:
            data = adjust_bits_stored(data, bits_stored_actual, bits_stored_meta)
        
        # Normalize and convert the data to 8-bit
        data = data.astype(float)
        data = (data - np.min(data)) / (np.max(data) - np.min(data))
        data = (data * 255).astype(np.uint8)
        
        return data
    else:
        # Raise an error if the DICOM file does not contain pixel data
        raise AttributeError("The DICOM file doesn't contain pixel data.")

def save_as_png(image, png_path):
    # Convert the numpy array to a PIL image and save it as a PNG file
    image_pil = Image.fromarray(image)
    image_pil.save(png_path)


In [None]:
import os

def dicom_to_png(dicom_folder, png_folder):
    # Loop through each file in the DICOM folder
    for filename in os.listdir(dicom_folder):
        # Check if the file is a DICOM file
        if filename.lower().endswith('.dicom'):
            # Construct the full path for the DICOM file
            dicom_path = os.path.join(dicom_folder, filename)
            # Generate the corresponding PNG file path
            png_path = os.path.join(png_folder, filename.replace('.dicom', '.png'))

            try:
                # Convert the DICOM file to a numpy array
                image = load_dicom(dicom_path)
                # Save the numpy array as a PNG file
                save_as_png(image, png_path)
                print(f"Converted '{dicom_path}' to '{png_path}'")
            except Exception as e:
                # Print an error message if the conversion fails
                print(f"Failed to convert '{dicom_path}': {e}")

# Specify the folder paths for DICOM files and output PNG files
dicom_folder = '/Users/timcheng/Downloads/vinbigdata-chest-xray-abnormalities-detection/train'
png_folder = '/Users/timcheng/PycharmProjects/pythonProject/ai/train_png'

# Create the PNG folder if it doesn't exist
os.makedirs(png_folder, exist_ok=True)

# Call the function to convert all DICOM files in the folder to PNG format
dicom_to_png(dicom_folder, png_folder)


In [None]:
import pandas as pd
import os
from PIL import Image

csv_file_path = '/Users/timcheng/Downloads/vinbigdata-chest-xray-abnormalities-detection/train.csv'
images_path = '/Users/timcheng/PycharmProjects/pythonProject/ai/train_png'
labels_path = images_path

df = pd.read_csv(csv_file_path)

# Iterating through each row of the DataFrame
for index, row in df.iterrows():
    image_id = row['image_id']
    # Creating a file path for the corresponding .txt annotation file
    txt_file_path = os.path.join(labels_path, image_id + '.txt')

    label_str = ''  # Initializing label string

    if row['class_id'] == 14:  # Handling the 'No finding' case
        label_str = '14 0 0 1 1\n'  # Label format for no finding
    elif pd.notna(row['x_min']):  # Checking if the annotation exists
        # Retrieving image size
        image_path = os.path.join(images_path, image_id + '.png')
        with Image.open(image_path) as img:
            width, height = img.size

        # Calculating YOLO format coordinates
        x_center = ((row['x_min'] + row['x_max']) / 2) / width
        y_center = ((row['y_min'] + row['y_max']) / 2) / height
        w = (row['x_max'] - row['x_min']) / width
        h = (row['y_max'] - row['y_min']) / height

        # Constructing label string in YOLO format
        label_str = f"{row['class_id']} {x_center} {y_center} {w} {h}\n"
    
    # Writing the label string to the .txt file
    with open(txt_file_path, 'w') as file:
        file.write(label_str)


In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

images_path = '/Users/timcheng/PycharmProjects/pythonProject/ai'
labels_path = images_path

# Collecting all .png image files
image_files = [img for img in os.listdir(images_path) if img.endswith('.png')]

# Splitting the dataset (22% train, 5% test, 3% validate)
rest_images, train_val_images = train_test_split(image_files, test_size=0.30, random_state=42)
train_images, test_val_images = train_test_split(train_val_images, test_size=(0.022/0.30), random_state=42)
test_images, val_images = train_test_split(test_val_images, test_size=(0.03/0.08), random_state=42)

# Creating directories for train, validate, test, and rest
train_dir = os.path.join(images_path, 'train')
val_dir = os.path.join(images_path, 'val')
test_dir = os.path.join(images_path, 'test')
rest_dir = os.path.join(images_path, 'rest')
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)
os.makedirs(rest_dir, exist_ok=True)

# Function to move files
def move_files(file_list, source_dir, target_dir):
    for file in file_list:
        shutil.move(os.path.join(source_dir, file), target_dir)
        txt_file = file.replace('.png', '.txt')
        txt_source = os.path.join(source_dir, txt_file)
        if os.path.exists(txt_source):
            shutil.move(txt_source, target_dir)

# Moving the files
move_files(train_images, images_path, train_dir)
move_files(val_images, images_path, val_dir)
move_files(test_images, images_path, test_dir)
move_files(rest_images, images_path, rest_dir)
