This Notebook Process Dicom Files in batch process sending files to the dicom processor one by one via http requests
redacting sensitive information from DICOM (Digital Imaging and Communications in Medicine) files. Summary of what each cell does:

1. **Cell 2**: Imports necessary libraries and modules, including `requests`, `json`, `os`, `pydicom`, `matplotlib.pyplot`, and `presidio_image_redactor`.
2. **Cell 3**: Loads a specific DICOM file using `pydicom.dcmread`.
3. **Cell 4**: Defines input and output paths for DICOM files and initializes the `DicomImageRedactorEngine`.
4. **Cell 5**: Contains a markdown cell indicating the start of a performance test for batch processing.
5. **Cell 6**: Redacts text PHI (Protected Health Information) from DICOM images in a directory and measures the time taken for this process.
6. **Cell 7**: Calculates the time taken for the redaction process in microseconds.
7. **Cell 8**: Converts the time taken for the redaction process to seconds.
8. **Cell 9**: Defines a function `process_dicom_files` to iterate through a folder, process DICOM files using an API, and save the redacted results to a local folder.
9. **Cell 10**: Measures the time taken for the `process_dicom_files` function to run.
10. **Cell 11**: Displays the time taken for the `process_dicom_files` function to run.
11. **Cell 12**: Displays the shape of the pixel array of the loaded DICOM instance.
12. **Cell 13**: Plots the pixel array of the loaded DICOM instance using `matplotlib`.
13. **Cell 14**: Runs the `process_dicom_files` function on a different input folder and measures the time taken.
14. **Cell 15**: Displays the time taken for the batch processing in seconds.

Overall, the notebook is designed to load DICOM files, redact sensitive information, measure the performance of these operations, and visualize the DICOM images.

In [None]:
import requests
import json
import os
import pydicom
from pydicom.dataelem import DataElement
import base64
import glob
from pathlib import Path
import matplotlib.pyplot as plt
from presidio_image_redactor import DicomImageRedactorEngine
import datetime

In [None]:
# Load in and process your DICOM file as needed
dicom_instance = pydicom.dcmread('dicom/manifest-1617826161202/Pseudo-PHI-DICOM-Data/Pseudo-PHI-001/06-26-2003-NA-XR CHEST AP PORTABLE-96544/1002.000000-NA-96023/1-1.dcm')


In [None]:
# Single DICOM (.dcm) file or directory containing DICOM files
input_path = 'dicom/'

# Directory where the output will be written
output_parent_dir = 'output/'

engine = DicomImageRedactorEngine()

# Performance Test Batch Processing

In [None]:
# Redact text PHI from DICOM images
time1 = datetime.datetime.now()
engine.redact_from_directory(
    input_dicom_path = input_path,
    output_dir = output_parent_dir,
    fill="contrast",
    save_bboxes=True # if True, saves the redacted region bounding box info to .json files in the output dir
)
time2 = datetime.datetime.now()

In [None]:
(time2 - time1).microseconds 

In [None]:
time_batch = ((time2 - time1).seconds * 1000000 + (time2 - time1).microseconds )/ 1000000

In [None]:
import os
import requests
import base64

def process_dicom_files(input_folder, output_folder):
    """
    Iterates through a folder with subfolders, processes DICOM files,
    and saves the redacted results to a local folder.

    Args:
        input_folder (str): Path to the input folder containing DICOM files.
        output_folder (str): Path to the output folder to save redacted DICOM files.
    """

    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for root, _, files in os.walk(input_folder):
        for file in files:
            if file.lower().endswith(('.dcm', '.dicom', 'DCM')):
                input_filepath = os.path.join(root, file)
                relative_path = os.path.relpath(input_filepath, input_folder) # get the relative path
                output_filepath = os.path.join(output_folder, relative_path)

                # Create the output subdirectories if they don't exist
                os.makedirs(os.path.dirname(output_filepath), exist_ok=True)

                try:
                    url = "https://ai.demo.datadetect.com/pii_dicom/process-dicom-image"
                    with open(input_filepath, 'rb') as f:
                        files = {'file': f}
                        r = requests.post(url, files=files)
                        r.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
                        out = r.json()

                    encoded_dicom = out['redacted_instance']
                    decoded_dicom = base64.b64decode(encoded_dicom)

                    with open(output_filepath, 'wb') as f:
                        f.write(decoded_dicom)

                    print(f"Processed and saved: {output_filepath}")

                except requests.exceptions.RequestException as e:
                    print(f"Error processing {input_filepath}: {e}")
                except (KeyError, ValueError, TypeError) as e:
                    print(f"Error processing {input_filepath}: Invalid API response: {e}")
                except Exception as e:
                    print(f"An unexpected error occurred while processing {input_filepath}: {e}")

# Example usage:
input_folder = "dicom"  # Replace with your input folder path
output_folder = "output/dicon_online/" # Replace with your output folder path
time3 = datetime.datetime.now()
process_dicom_files(input_folder, output_folder)
time4 = datetime.datetime.now()

In [None]:
time_batch = ((time4 - time3).seconds * 1000000 + (time4 - time3).microseconds )/ 1000000

In [None]:
time_batch

In [None]:
dicom_instance.pixel_array.shape

In [None]:
plt.figure(figsize=(10,10))
plt.imshow(dicom_instance.pixel_array)

In [None]:
# Example usage:
input_folder = "dicom_modified"  # Replace with your input folder path
output_folder = "output/dicon_online2/" # Replace with your output folder path
time5 = datetime.datetime.now()
process_dicom_files(input_folder, output_folder)
time6 = datetime.datetime.now()

In [None]:
time_batch = ((time6 - time5).seconds * 1000000 + (time6 - time5).microseconds )/ 1000000
print(f"Time taken for batch processing: {time_batch} seconds")


Time taken for batch processing: 5845.010997 seconds