In [1]:
import os, sys
sys.path.insert(0, "\\".join(os.path.abspath(os.curdir).split("\\")[:-2:]))
from pathlib import Path

import matplotlib.pyplot as plt
from pydicom import dcmread
import pandas as pd
import numpy as np
import json

from src.utils.utils import *

# Catalogar dados

## DDSM

### Pegar Metadados em csv e dicom

In [2]:
def get_study_metadata(paths: dict, metadata_csv: object, indice: int) -> dict:
    # Pega metadados no dataframe
    dic_meta_in_csv = {}  
    
    for col in metadata_csv.iloc[::, :11:].columns.tolist():
        if isinstance(metadata_csv[col][indice], np.integer):
            dic_meta_in_csv[col] = int(metadata_csv[col][indice])
        elif pd.isna(metadata_csv[col][indice]):
            dic_meta_in_csv[col] = "NaN"
        else:
            dic_meta_in_csv[col] = metadata_csv[col][indice]
    
    # Pega metadados no objeto dicom
    metadata_dicom_files = {"original": [], "cropped": []}
    
    for key, path in paths.items():
        path_exam_files = f"D:/mathe/Documents/BancoDados_IC/CBIS-DDSM/{path}/"
        directory = Path(path_exam_files)
        paths_dicom_images = list(directory.rglob("*.dcm*"))
        
        #Adicionar path da imagem no metadados do csv
        dic_meta_in_csv[f'{key}_image_path'] = str(paths_dicom_images[0]).replace("\\", "/").split("/")
        dic_meta_in_csv[f'{key}_image_path'].pop() # Retira o nome do arquivo Dicom do Path
        dic_meta_in_csv[f'{key}_image_path'] = "/".join(dic_meta_in_csv[f'{key}_image_path'])
        
        for path_dicom in paths_dicom_images:
            dicom_file = dcmread(path_dicom)
            metadata_dicom_files[key].append(get_dicom_meta(dicom_file, False))
    
    return study_factory(paths['original'], dic_meta_in_csv, metadata_dicom_files)

### Salvar metadados arquivos Json

In [4]:
metadata_files = ['calc_case_description_test_set',
             'mass_case_description_train_set',
             'calc_case_description_train_set',
             'mass_case_description_test_set']

for current_meta in metadata_files:
    df = pd.read_csv(f"../../metadata/DDSM/raw_metadata/{current_meta}.csv")
    
    studies = []
    for i in range(df.shape[0]):
        original_image_path = df.iloc[i][11]
        cropped_images_path = df.iloc[i][12]
        
        original_image_path = preprocessing_path(original_image_path)
        cropped_images_path = preprocessing_path(cropped_images_path)
        
        metadata = df.iloc[::, :11:]
        all_image_path = {"original": original_image_path, "cropped":cropped_images_path}
        
        studies.append(get_study_metadata(all_image_path, metadata, i))
    save_json(current_meta, studies, f"../../metadata/DDSM/metadata_csv_and_dicom")  

## CMMD

In [6]:
path_metadata = f"../../metadata/CMMD/raw_metadata/CMMD_clinicaldata_revision.xlsx"
path_cmmd = path = f"D:/mathe/Documents/BancoDados_IC/CMMD"

df = pd.read_excel(path_metadata)
df = df.fillna("NaN")

studies = []

left_or_right_breast = {'L': 'Left', 'R': 'Right'}
abnormality = {'calcification': 'Calc', 'mass': 'Mass', 'both': 'Both'}

for i in range(df.shape[0]):
    # Pegar o nome do Estudo
    study_name = [abnormality[df.iloc[i:i+1:, 4:5:].values[0][0]],
                  df.iloc[i:i+1:, 0:1:].values[0][0],
                  left_or_right_breast[df.iloc[i:i+1:, 1:2:].values[0][0]]]
    study_name = "_".join(study_name)
    
    # Pegar metadados no csv
    metadata_csv = {}
    for column in df.columns:
        if isinstance(df.loc[i, column], np.integer):
            metadata_csv[column] = int(df.loc[i, column])
        elif pd.isna(df.loc[i, column]):
            metadata_csv[column] = "NaN"
        else:
            metadata_csv[column] = df.loc[i, column]
     
    # Pegar paths dos arquivo dicom   
    path_exam_files = path_cmmd + f"/{df.loc[i, 'ID1']}/"
    directory = Path(path_exam_files)
    paths_dicom_images = list(directory.rglob("*.dcm*"))
    
    # Pegar metadados dos arquivos dicom
    metadata_dicom_files = []
    
    for path_dicom in paths_dicom_images:
        dicom_file = dcmread(path_dicom)
        metadata_dicom_files.append(get_dicom_meta(dicom_file, False))
        
    studies.append(study_factory(study_name, metadata_csv, metadata_dicom_files))
  
save_json("metadata_csv_and_dicom", studies, f"../../metadata/CMMD" ) 