### DICOM Staging from Raw Location to Staging Location

#### Overview
This notebook handles the staging process of DICOM files. The primary objective is to copy DICOM files from the **raw** source location to a **staging** area, where they can be further processed or validated before final usage.

##### Overall Process
1. **Raw Location Input**: DICOM files are located in a specified raw source folder.
2. **Staging Location Setup**: A new staging folder is prepared to receive the DICOM files.
3. **File Copying**: DICOM files are copied from the raw location to the staging folder.
4. **Directory Structure Preservation**: The original folder structure from the raw location is preserved in the staging location.
5. **Logging and Reporting**: All actions, including files copied and any errors encountered, are logged for transparency.

  
#### Outputs
- A staging directory populated with DICOM files from the raw location in logical structure dir: PatientID/StudyInstanceUID/SeriesInstanceUID/....

In [None]:
import os
import shutil
import pydicom
import logging
from pathlib import Path

class DicomProcessor:
    def __init__(self, base_input_dir, base_output_dir):
        self.base_input_dir = base_input_dir
        self.base_output_dir = base_output_dir

    def parse_dicom_header(self, dicom_file):
        """
        Parsing the relevant fields which we have got from DICOM file and return the metadata.
        """
        try:
            dicom_data = pydicom.dcmread(dicom_file)
            subdirectory_metadata = {
                'PatientID': dicom_data.PatientID if 'PatientID' in dicom_data else 'N/A',
                'StudyInstanceUID': dicom_data.StudyInstanceUID if 'StudyInstanceUID' in dicom_data else 'N/A',
                'StudyID': dicom_data.get('StudyID', 'N/A'),
                'SeriesInstanceUID': dicom_data.SeriesInstanceUID if 'SeriesInstanceUID' in dicom_data else 'N/A',
            }
            logging.info(f"Successfully parsed DICOM file: {dicom_file}")
            return subdirectory_metadata
        except pydicom.errors.InvalidDicomError as e:
            logging.error(f"Invalid DICOM file {dicom_file}: {e}")
            return None
        except KeyError as e:
            logging.error(f"Missing required DICOM field in {dicom_file}: {e}")
            return None
        except Exception as e:
            logging.error(f"Unexpected error while reading DICOM file {dicom_file}: {e}")
            return None

    def create_folder_structure(self, subdirectory_metadata):
        """
        Create the folder structure for PatientID/StudyInstanceUID/SeriesInstanceUID.
        """
        patient_id = subdirectory_metadata['PatientID']
        study_instance_uid = subdirectory_metadata['StudyInstanceUID']
        series_instance_uid = subdirectory_metadata['SeriesInstanceUID']
        
        output_folder = os.path.join(self.base_output_dir, patient_id, study_instance_uid, series_instance_uid)
        os.makedirs(output_folder, exist_ok=True)
        logging.info(f"Created output folder: {output_folder}")
        return output_folder

    def move_dicom_file(self, dicom_file, output_folder):
        """
        Staging the DICOM file into the target directory. If a conflict arises, rename the file.
        """
        base_name = os.path.basename(dicom_file)
        dest_file = os.path.join(output_folder, base_name)
        
        # If the file exists, append a suffix to avoid overwriting
        if os.path.exists(dest_file):
            counter = 1
            name, ext = os.path.splitext(base_name)
            while os.path.exists(dest_file):
                dest_file = os.path.join(output_folder, f"{name}_{counter}{ext}")
                counter += 1
        
        shutil.copy(dicom_file, dest_file)
        logging.info(f"Moved {dicom_file} to {dest_file}")

    def process_dicom_files(self, dicom_folder):
        """
        Process DICOM files in a given folder, extract subdirectory_metadata and organizing.
        """
        dicom_files = [os.path.join(dicom_folder, f) for f in os.listdir(dicom_folder) if f.endswith('.dcm')]

        # Check if there are any non-DICOM files and log them
        non_dicom_files = [f for f in os.listdir(dicom_folder) if not f.endswith('.dcm')]
        if non_dicom_files:
            logging.info(f"Skipping non-DICOM files: {', '.join(non_dicom_files)}")
        
        for dicom_file in dicom_files:
            subdirectory_metadata = self.parse_dicom_header(dicom_file)
            if subdirectory_metadata:
                output_folder = self.create_folder_structure(subdirectory_metadata)
                self.move_dicom_file(dicom_file, output_folder)

    def process_subfolders(self, base_dir, visited_dirs):
        """
        Recursively process all subfolders under a given base directory, also avoiding re-processing visited directories.
        """
        for root, dirs, files in os.walk(base_dir):
            if root in visited_dirs:
                continue  # Skipping the directories that have already been processed
            
            visited_dirs.add(root)  # Flagging the current directory as visited

            #DICOM files in the current directory
            logging.info(f"Processing folder: {root}")
            self.process_dicom_files(root)

            # Recurse into subdirectories
            for dir_name in dirs:
                dir_path = os.path.join(root, dir_name)
                self.process_subfolders(dir_path, visited_dirs)

    def process_all_folders(self):
        """Process all subfolders in the base input directory recursively."""
        visited_dirs = set()  # To keep track of visited directories
        self.process_subfolders(self.base_input_dir, visited_dirs)

def main():


#current script directory
    base_dir = Path(__file__).resolve().parent

    base_input_dir = base_dir / "qure_ai/lidc_small_dset"
    base_output_dir = base_dir / "qure_ai/lidc_small_dset_staging"

    # dicom_processor class object/instance creation
    dicom_processor = DicomProcessor(base_input_dir, base_output_dir)

    #aceesing the folder process function through the object created
    dicom_processor.process_all_folders()

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.FileHandler('error_log.log'), logging.StreamHandler()])
    main()
