In [1]:
import matplotlib.pyplot as plt
from pydicom import dcmread
import pandas as pd
import numpy as np
import json
import os
from pathlib import Path

# Catalogar dados

## Funções Uteis

### Carregar e salvar em Json e pré processar path

In [2]:
def preprocessing_path(path: str) -> str:
    path = path.split("/")
    path = path[0]
    
    return path

def load_json(object_name: str) -> None | object:
    path = os.path.abspath("catalogar_dicom_imagens.ipynb")
    path = "/".join(path.split("\\")[:-2:])
    path = path + f"/metadata/metadata_csv_and_dicom/{object_name}.json"
    try:
        with open(path, 'r') as json_file:
            return json.load(json_file)  
    except json.decoder.JSONDecodeError:
        return None
    except FileNotFoundError:
        with open(path, 'w', encoding='utf-8') as json_file:
            return None
        
def save_json(object_name: str, list_metadata: list) -> None:
    path = f"../metadata/metadata_csv_and_dicom/{object_name}.json"
    with open(path, 'w', encoding='utf-8') as json_file:
        json.dump(list_metadata, json_file, ensure_ascii=False, indent=3)

### construtoras

In [3]:
def get_dicom_meta(dicom_file: object) -> dict:
    dictionary = {}

    for data_element in dicom_file:
        if data_element.value == "" or data_element.description() in ["Pixel Array", "Pixel Data"]:
            continue
    
        tag = data_element.tag
        tag_name = data_element.description()
        tag_name = tag_name.replace(" ", "_").lower()
        
        if tag_name in ["patient's_name", "referring_physician's_name"]:
            value = "^".join(data_element.value.components)
        else:
            value = data_element.value
            
        if isinstance(value, bytes):
            value = value.decode("utf-8")
        
        dictionary[f"{tag_name} {tag}"] = value
    
    return dictionary

def study_factory(study_name: str, metadata_csv: dict, metadata_dicom_files: list) -> dict:
    return {'study_name': study_name,
            'metadata_csv': metadata_csv,
            'metadata_dicom_files':metadata_dicom_files
            }

### Pegar Metadados em csv e dicom

In [4]:
def get_study_metadata(paths: dict, metadata_csv: object, indice: int) -> dict:
    # Pega metadados no dataframe
    dic_meta_in_csv = {}  
    
    for col in metadata_csv.iloc[::, :11:].columns.tolist():
        if isinstance(metadata_csv[col][indice], np.integer):
            dic_meta_in_csv[col] = int(metadata_csv[col][indice])
        elif pd.isna(metadata_csv[col][indice]):
            dic_meta_in_csv[col] = "Nan"
        else:
            dic_meta_in_csv[col] = metadata_csv[col][indice]
    
    # Pega metadados no objeto dicom
    metadata_dicom_files = {"original": [], "cropped": []}
    #metadata_dicom_files = {"original": [], "cropped": [], "roi": []}
    
    for key, path in paths.items():
        path_exam_files = f"D:/mathe/Documents/BancoDados_IC/CBIS-DDSM/{path}/"
        directory = Path(path_exam_files)
        paths_dicom_images = list(directory.rglob("*.dcm*"))
        
        #Adicionar path da imagem no metadados do csv
        dic_meta_in_csv[f'{key}_image_path'] = str(paths_dicom_images[0]).replace("\\", "/").split("/")
        dic_meta_in_csv[f'{key}_image_path'].pop() # Retira o nome do arquivo Dicom do Path
        dic_meta_in_csv[f'{key}_image_path'] = "/".join(dic_meta_in_csv[f'{key}_image_path'])
        
        for path_dicom in paths_dicom_images:
            dicom_file = dcmread(path_dicom)
            metadata_dicom_files[key].append(get_dicom_meta(dicom_file))
    
    return study_factory(paths['original'], dic_meta_in_csv, metadata_dicom_files)

## Salvar metadados arquivos Json

In [5]:
metadata_files = ['calc_case_description_test_set',
             'mass_case_description_train_set',
             'calc_case_description_train_set',
             'mass_case_description_test_set']

for current_meta in metadata_files:
    df = pd.read_csv(f"../metadata//{current_meta}.csv")
    
    studies = []
    for i in range(df.shape[0]):
        original_image_path = df.iloc[i][11]
        cropped_images_path = df.iloc[i][12]
        #roi_image_path = df.iloc[i][13]
        
        original_image_path = preprocessing_path(original_image_path)
        cropped_images_path = preprocessing_path(cropped_images_path)
        #roi_image_path = preprocessing_path(roi_image_path)
        
        metadata = df.iloc[::, :11:]
        all_image_path = {"original": original_image_path, "cropped":cropped_images_path}
        #all_image_path = {"original": original_image_path, "cropped":cropped_images_path, "roi": roi_image_path}
        
        studies.append(get_study_metadata(all_image_path, metadata, i))
    save_json(current_meta, studies)  