# Environment

In [None]:
import pandas as pd
import threading
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
import logging
import pydicom
import numpy as np
from pydicom.pixel_data_handlers.util import apply_voi_lut
from PIL import Image
from tqdm import tqdm
import settings as settings

# Paths
base_input_path = Path(settings.base_input_path)
base_output_path = Path(settings.base_output_path)
output_tables_path = Path(settings.output_tables_path)

# Load metadata table
metadata_keys = pd.read_csv(settings.metadata_keys_path)

# Extract unique segments and create a dictionary of folders (name of folders: paths)
unique_segments = set(metadata_keys['Segment'])
folder_dict = {folder.name: folder for folder in base_input_path.iterdir() if folder.is_dir() and folder.name in unique_segments}

# Calculate total number of .dc3 files across all folders in folder_dict
total_files = sum(1 for folder in folder_dict.values() for DICOM in folder.iterdir() if DICOM.suffix.lower() == '.dc3')

# Filter metadata table to include only existing folders (for testing purposes with fewer folders)
metadata_keys_filtered = metadata_keys[metadata_keys['Segment'].isin(folder_dict.keys())]
unique_segments_filtered = set(metadata_keys_filtered['Segment'])


# Functions

In [None]:
# Function to convert DICOM file into JPG file
def convert_d3c_to_jpg(d3c_file_path, jpg_file_path, voi_lut=True):
    image_errors = []
    try:
        ds = pydicom.dcmread(d3c_file_path)
        # VOI LUT (if available by DICOM device) is used to enhance the visualization of
        # specific ranges of pixel values that are most relevant for diagnosis
        if voi_lut:
            data = apply_voi_lut(ds.pixel_array, ds)
        else:
            data = ds.pixel_array
        data = data - np.min(data)
        data = data / np.max(data)
        data = (data * 255).astype(np.uint8)
        # Invert pixel values if necessary
        if ds.PhotometricInterpretation == "MONOCHROME1":
            data = np.max(data) - data
        ## Equalize the histogram of the image to improve contrast
        # data = cv2.equalizeHist(data)
        # Create an image and save it in JPG format
        im = Image.fromarray(data)
        im.save(jpg_file_path, "JPEG", quality=95)
    except Exception as e:
        logging.error("Error processing file %s: %s", d3c_file_path, str(e))
        # Add the file name to the error list
        image_errors.append(d3c_file_path)
    return image_errors

# Function to process DICOM, convert them into JPG and save them using a specific folder system
def process_and_convert_files(dataframe):
    image_errors = []
    processed_images = []
    lock = threading.Lock()

    def process_file(row, pbar):
        nonlocal image_errors
        segment = row['Segment']
        patient_group_folder = row['patient_group_folder']
        patient_deid_folder = row['patient_deid_folder']
        segment_deid = row['Segment_deid']
        file_name_deid = row['SOPInstanceUID_deid']
        file_name = row['SOPInstanceUID']
        
        source_folder = folder_dict[segment]
        dest_folder = base_output_path / patient_group_folder / patient_deid_folder / segment_deid
        dest_folder.mkdir(parents=True, exist_ok=True)
        
        d3c_file_path = source_folder / f"{file_name}.dc3"
        if d3c_file_path.exists():
            jpg_file_path = dest_folder / f"{file_name_deid}.jpg"
            errors = convert_d3c_to_jpg(d3c_file_path, jpg_file_path)
            if errors:
                with lock:
                    image_errors.append(row)
            else:
                with lock:
                    processed_images.append(row)
            with lock:
                pbar.update(1)
        else:
            with lock:
                image_errors.append(row)
            with lock:
                pbar.update(1)

    with tqdm(total=total_files, desc="Processing files", unit="file") as pbar, ThreadPoolExecutor(max_workers=24) as executor:
        futures = [executor.submit(process_file, row, pbar) for _, row in dataframe.iterrows()]
        for future in as_completed(futures):
            future.result()

    # Create dataframes from the lists of errors and processed images
    errors_df = pd.DataFrame(image_errors)
    processed_images_df = pd.DataFrame(processed_images)
    
    # Save the dataframes to CSV files
    errors_df.to_csv(output_tables_path / "conversion_errors.csv", index=False)
    processed_images_df.to_csv(output_tables_path / "processed_images.csv", index=False)

    print(image_errors)


# Main

In [None]:
# Configure logging
logging.basicConfig(filename='conversion_errors.log', level=logging.ERROR, 
                    format='%(asctime)s - %(levelname)s - %(message)s')

# Call to main function
process_and_convert_files(metadata_keys_filtered)
