In [1]:
import os
import sys

import pandas as pd
import numpy as np
from skimage.feature import graycomatrix, graycoprops

sys.path.insert(0, "\\".join(os.path.abspath(os.curdir).split("\\")[:-2:]))

from pydicom import dcmread

from src.utils.utils import *

In [2]:
path_json = "../../outputs/mamografia/extracted_metadata"
name_json = "extracted_metadata_CBIS-DDSM"

list_metadata = load_json(name_json, path_json)

# images_original = []
images_resized = []
image_names = []
labels = []

for metadata in list_metadata:
    image_name = metadata['study_name']
    image_names.append(image_name)
    
    metadata_csv = metadata['metadata_csv']
    
    label = metadata_csv['pathology']
    labels.append(label)
    
    directory = Path(metadata_csv['original_image_path'])
    path_dicom_file = list(directory.rglob("*.dcm*"))[0]
    dicom_file = dcmread(path_dicom_file)
    
    # Carrega a matriz da imagem original e salva no array
    image = dicom_file.pixel_array
    # images_original.append(image)
    
    # Pega as dimensões da image
    height, width = image.shape
    ymin, ymax, xmin, xmax = height//3, height*2//3, width//3, width*2//3
    
    # Redimensiona a imagem original e salva no array
    image_255 = (image / image.max()) * 255
    image_255 = image_255.astype(np.uint8)
    crop = image_255[ymin:ymax, xmin:xmax]
    image_resized = cv2.resize(crop, (0,0), fx=0.5, fy=0.5)
    images_resized.append(image_resized)
    
images_resized[0].shape

(770, 502)

In [3]:
properties = ['dissimilarity', 'correlation', 'homogeneity', 'contrast', 'ASM', 'energy']
glcm_features = [] # Cada uma das propriedades é trazida para a lista de features

for img, image_name in zip(images_resized, image_names):
    glcm = graycomatrix(img, distances=[5], angles=[0, np.pi/2], levels=256, symmetric=True, normed=True)
    feature = []
    glcm_props = [propery for name in properties for propery in graycoprops(glcm, name)[0]]

    for item in glcm_props:
            feature.append(item)

    glcm_features.append(feature)
 
columns = []
angles = ['0', '90']
for name in properties:
    for ang in angles:
        columns.append(name + "_" + ang)

In [7]:
glcm_df = pd.DataFrame(glcm_features, columns = columns)
glcm_df['pathology'] = labels
glcm_df['pathology'] = glcm_df['pathology'].replace('BENIGN_WITHOUT_CALLBACK', 'BENIGN')
glcm_df.to_csv("../../outputs/mamografia/matriz_features_glcm.csv", index=False)

glcm_df.head()

Unnamed: 0,dissimilarity_0,dissimilarity_90,correlation_0,correlation_90,homogeneity_0,homogeneity_90,contrast_0,contrast_90,ASM_0,ASM_90,energy_0,energy_90,pathology
0,1.558669,1.330008,0.991838,0.99325,0.700942,0.718123,15.089312,12.688345,0.374834,0.374206,0.612237,0.611724,BENIGN
1,2.675529,2.502785,0.994682,0.995176,0.43801,0.454717,16.133688,14.702494,0.065477,0.067049,0.255886,0.258937,BENIGN
2,1.631688,1.372944,0.992358,0.994149,0.687811,0.705508,12.983167,10.130144,0.357444,0.357212,0.597867,0.597672,BENIGN
3,1.631688,1.372944,0.992358,0.994149,0.687811,0.705508,12.983167,10.130144,0.357444,0.357212,0.597867,0.597672,BENIGN
4,2.473641,2.381861,0.994516,0.994735,0.474037,0.486832,14.635021,14.176513,0.090544,0.092386,0.300906,0.303951,BENIGN


In [6]:
glcm_df['pathology'].replace('BENIGN_WITHOUT_CALLBACK', 'BENIGN').unique()

array(['BENIGN', 'MALIGNANT'], dtype=object)