In [1]:
import matplotlib.pyplot as plt
from pydicom import dcmread
import pandas as pd
import json
import os
from pathlib import Path

# Catalogar dados

## Funções Uteis

### Carregar e salvar em Json e pré processar path

In [2]:
def preprocessing_path(path):
    path = path.split("/")
    path = path[0]
    
    return path

def load_json(object_name):
    path = os.path.abspath("catalogar_dicom_imagens.ipynb")
    path = "/".join(path.split("\\")[:-2:])
    path = path + f"/metadata/metadata_csv_and_dicom/{object_name}.json"
    try:
        with open(path, 'r') as json_file:
            return json.load(json_file)  
    except json.decoder.JSONDecodeError:
        return None
    except FileNotFoundError:
        with open(path, 'w', encoding='utf-8') as json_file:
            return None
        
def save_json(object_name, list_metadata):
    path = f"../metadata/metadata_csv_and_dicom/{object_name}.json"
    with open(path, 'w', encoding='utf-8') as json_file:
        json.dump(list_metadata, json_file, ensure_ascii=False, indent=3)

### construtoras

In [3]:
def get_dicom_meta(dicom_file):
    """ dictionary = {"specific_character_set": dicom_file[(0x008, 0x005)].value,
            "sop_class_uid": dicom_file[(0x008, 0x016)].value,
            "sop_instance_uid": dicom_file[(0x008, 0x018)].value,
            "study_date": dicom_file[(0x008, 0x020)].value,
            "content_date": dicom_file[(0x008, 0x023)].value,
            "study_time": dicom_file[(0x008, 0x030)].value,
            "content_time": dicom_file[(0x008, 0x033)].value,
            "accession_number": dicom_file[(0x008, 0x050)].value,
            "modality": dicom_file[(0x008, 0x060)].value,
            "conversion_type": dicom_file[(0x008, 0x064)].value,
            "referring_physician's_name": "".join(dicom_file[(0x008, 0x090)].value.components),
            "patient's_name": "".join(dicom_file[(0x010, 0x010)].value.components),
            "patient_id": dicom_file[(0x010, 0x020)].value,
            "patient_s_birth_date": dicom_file[(0x010, 0x030)].value,
            "patient_s_sex": dicom_file[(0x010, 0x040)].value,
            "body_part_examined": dicom_file[(0x018, 0x015)].value,
            "secondary_capture_device_manufactur_1": dicom_file[(0x018, 0x1016)].value,
            "secondary_capture_device_manufactur_2": dicom_file[(0x018, 0x1018)].value,
            "study_instance_uid": dicom_file[(0x020, 0x000d)].value,
            "series_instance_uid": dicom_file[(0x020, 0x000e)].value,
            "study_id": dicom_file[(0x020, 0x0010)].value,
            "series_number": dicom_file[(0x020, 0x0011)].value,
            "instance_number": dicom_file[(0x020, 0x0013)].value,
            "patient_orientation": dicom_file[(0x020, 0x0020)].value,
            "samples_per_pixel": dicom_file[(0x028, 0x002)].value,
            "photometric_interpretation": dicom_file[(0x028, 0x004)].value,
            "rows": dicom_file[(0x028, 0x0010)].value,
            "columns": dicom_file[(0x028, 0x0011)].value,
            "bits_allocated": dicom_file[(0x028, 0x0100)].value,
            "bits_stored": dicom_file[(0x028, 0x0101)].value,
            "high_bit": dicom_file[(0x028, 0x0102)].value,
            "pixel_representation": dicom_file[(0x028, 0x0103)].value,
            "smallest_image_pixel_value": dicom_file[(0x028, 0x0106)].value,
            "largest_image_pixel_value": dicom_file[(0x028, 0x00107)].value,
            }
    
    for key, value in dictionary.items():
        if value == '':
            del dictionary[key] """
            
    dictionary = {}

    for data_element in dicom_file:
        if data_element.value == "" or data_element.description() in ["Pixel Array", "Pixel Data"]:
            continue
    
        tag = data_element.tag
        tag_name = data_element.description()
        tag_name = tag_name.replace(" ", "_").lower()
        
        if tag_name in ["patient's_name", "referring_physician's_name"]:
            value = "^".join(data_element.value.components)
        else:
            value = data_element.value
            
        if isinstance(value, bytes):
            value = value.decode("utf-8")
        
        dictionary[f"{tag_name} {tag}"] = value
    
    return dictionary

def study_factory(study_name, metadata_csv, metadata_dicom_files):
    return {'study_name': study_name,
            'metadata_csv': metadata_csv,
            'metadata_dicom_files':metadata_dicom_files
            }

### Pegar Metadados em csv e dicom

In [4]:
def get_study_metadata(paths, metadata_csv, indice) -> dict:
    # Pega metadados no dataframe
    dic_meta_in_csv = {}  
    
    for col in metadata_csv.iloc[::, :10:].columns.tolist():
        if isinstance(metadata_csv[col][indice], str) | isinstance(metadata_csv[col][indice], int):
            dic_meta_in_csv[col] = metadata_csv[col][indice]
    
    # Pega metadados no objeto dicom
    metadata_dicom_files = {"original": [], "croped": [], "roi": []}
    
    for key, path in paths.items():
        path_exam_files = f"D:/mathe/Documents/BancoDados_IC/CBIS-DDSM/{path}/"
        directory = Path(path_exam_files)
        paths_dicom_images = list(directory.rglob("*.dcm*"))
        
        #Adicionar path da imagem no metadados do csv
        dic_meta_in_csv[f'{key}_image_path'] = str(paths_dicom_images[0]).replace("\\", "/").split("/")
        dic_meta_in_csv[f'{key}_image_path'].pop()
        dic_meta_in_csv[f'{key}_image_path'] = "/".join(dic_meta_in_csv[f'{key}_image_path'])
        
        for path_dicom in paths_dicom_images:
            dicom_file = dcmread(path_dicom)
            metadata_dicom_files[key].append(get_dicom_meta(dicom_file))
    
    return study_factory(paths['original'], dic_meta_in_csv, metadata_dicom_files)

## Salvar metadados arquivos Json

In [5]:
metadata_files = ['calc_case_description_test_set',
             'mass_case_description_train_set',
             'calc_case_description_train_set',
             'mass_case_description_test_set']

for current_meta in metadata_files:
    df = pd.read_csv(f"../metadata//{current_meta}.csv")
    
    studies = []
    for i in range(df.shape[0]):
        original_image_path = df.iloc[i][11]
        croped_image_path = df.iloc[i][12]
        roi_image_path = df.iloc[i][13]
        
        original_image_path = preprocessing_path(original_image_path)
        croped_image_path = preprocessing_path(croped_image_path)
        roi_image_path = preprocessing_path(roi_image_path)
        
        metadata = df.iloc[::, :10:]
        all_image_path = {"original": original_image_path, "croped":croped_image_path, "roi": roi_image_path}
        
        studies.append(get_study_metadata(all_image_path, metadata, i))
    save_json(current_meta, studies)  

In [6]:
path_original = "D:/mathe/Documents/BancoDados_IC/CBIS-DDSM/Mass-Test_P_00016_LEFT_CC/10-04-2016-DDSM-NA-30104/1.000000-full mammogram images-14172"
path_croped = "D:/mathe/Documents/BancoDados_IC/CBIS-DDSM/Mass-Test_P_00016_LEFT_CC_1/10-04-2016-DDSM-NA-09887/1.000000-cropped images-26184"
path_roi = "D:/mathe/Documents/BancoDados_IC/CBIS-DDSM/Mass-Test_P_00016_LEFT_CC_1/10-04-2016-DDSM-NA-09887/1.000000-cropped images-26184"
files = list(Path(path_croped).rglob("*.dcm*"))
dicom_file = dcmread(files[1])
#dicom_file
#plt.imshow(dicom_file.pixel_array, cmap=plt.cm.bone)

#for data_element in dicom_file:
    #if isinstance(data_element.value, bytes):
        #print(data_element.description() == "Pixel Data")
        #print(str(data_element.value))

b'CBIS-DDSM '
b'43372602'
b'\\\xda\xa2\xdbU\xddd\xde\x7f\xdep\xdd\x97\xdc)\xdc\xff\xda\xfb\xd8\xc5\xd8\xb9\xd9\x83\xd9\x83\xd9\x0b\xdaA\xda{\xdc\x0e\xdc&\xda\xfb\xd8\x9e\xd9A\xda\xf0\xd9&\xda5\xdbD\xdc\xdd\xdd"\xdf\x7f\xde\x13\xde\xaa\xdf\x0b\xe1\x92\xe1w\xe1w\xe15\xe2D\xe3\x06\xe6f\xe7\xee\xe7K\xe7K\xe7\x91\xe8\xff\xe8\xd8\xe9\xe2\xe8\xc4\xe6c\xe5\xa5\xe4D\xe3\xdb\xe40\xe7\xbc\xe9S\xeb_\xea\xc7\xe8\x15\xe7\x1d\xe4w\xe1\x9e\xe0\xfb\xdf\x92\xe1\xe7\xe3K\xe7_\xeaS\xebW\xed\x18\xf0\xd6\xf0\xa0\xf0\x81\xee8\xeb\xa8\xe6\xb1\xe3H\xe5\t\xe8v\xe8\xff\xe8S\xeb\xc0\xeb\xc0\xeb\x02\xeb\xe2\xe8Z\xe8z\xea\x8a\xeb\xcc\xea\xbc\xe9\xbc\xe9\x0e\xea\xff\xe8\xd8\xe9_\xeaz\xea\xc0\xebf\xee\xc7\xef\xac\xef\xb8\xee\x9c\xeer\xed~\xec\x8a\xebW\xedu\xef\xac\xef\x85\xf0\xaf\xf1\x01\xf2+\xf3C\xf1!\xed\xdb\xeb,\xec\xc0\xebH\xecD\xeaK\xe7K\xe7\xd3\xe7\xa8\xe6\xb4\xe5-\xe5\xcf\xe5!\xe6\xb4\xe5\x0e\xe3w\xe1\x9e\xe0\xb9\xe02\xe0\xec\xde.\xdeI\xdeI\xdeU\xdd:\xdd=\xdf5\xe2\xb1\xe3<\xe6$\xe8W\xe6\xb1\xe3\xa1\xe2k\xe2\x8