In [1]:
import os
import sys

import pandas as pd
import numpy as np
from skimage.feature import graycomatrix, graycoprops

sys.path.insert(0, "\\".join(os.path.abspath(os.curdir).split("\\")[:-2:]))

from pydicom import dcmread

from src.utils.utils import *

In [2]:
path_json = "../../outputs/mamografia/extracted_metadata"
name_json = "extracted_metadata_CBIS-DDSM"

list_metadata = load_json(name_json, path_json)

images_original = []
images_resized = []
image_names = []
labels = []

properties = ['dissimilarity', 'correlation', 'homogeneity', 'contrast', 'ASM', 'energy']
glcm_features = [] # Cada uma das propriedades é trazida para a lista de features
angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]

for metadata in list_metadata:
    image_name = metadata['study_name']
    image_names.append(image_name)
    
    metadata_csv = metadata['metadata_csv']
    
    label = metadata_csv['pathology']
    labels.append(label)
    
    directory = Path(metadata_csv['original_image_path'])
    path_dicom_file = list(directory.rglob("*.dcm*"))[0]
    dicom_file = dcmread(path_dicom_file)
    
    # Carrega a matriz da imagem original
    image = dicom_file.pixel_array
    
    # Pega as dimensões da image
    height, width = image.shape
    ymin, ymax, xmin, xmax = height//3, height*2//3, width//3, width*2//3
    
    # Redimensiona a imagem original e salva no array
    image_8bits = (image / 256).astype(np.uint8)
    
    #crop = image_8bits[ymin:ymax, xmin:xmax]
    #image_resized = cv2.resize(crop, (0,0), fx=0.5, fy=0.5)
    #images_resized.append(image_resized)
    # images_resized.append(image_8bits)
    
    glcm = graycomatrix(image_8bits, distances=[1, 5], angles=angles, levels=256, symmetric=True, normed=True)
    feature = []
    glcm_props = [propery for name in properties for propery in graycoprops(glcm, name)]
    
    for glcm_props_distance in glcm_props:
        for item in glcm_props_distance:
            feature.append(item)
            
    glcm_features.append(feature)
    
columns = []
distances = ['1', '5']
angles = ['0', '45', '90', '135']

for name in properties:
    for distance in distances:
        for ang in angles:
            columns.append(name + "_ang_" + ang + "_dist_" + distance)

In [3]:
glcm_df = pd.DataFrame(glcm_features, columns = columns)
glcm_df['pathology'] = labels
""" glcm_df['pathology'] = glcm_df['pathology'].replace('BENIGN_WITHOUT_CALLBACK', 'BENIGN') """
glcm_df.to_csv("../../outputs/mamografia/matriz_features_glcm.csv", index=False)

glcm_df.head()

Unnamed: 0,dissimilarity_ang_0_dist_1,dissimilarity_ang_45_dist_1,dissimilarity_ang_90_dist_1,dissimilarity_ang_135_dist_1,dissimilarity_ang_0_dist_5,dissimilarity_ang_45_dist_5,dissimilarity_ang_90_dist_5,dissimilarity_ang_135_dist_5,correlation_ang_0_dist_1,correlation_ang_45_dist_1,...,ASM_ang_135_dist_5,energy_ang_0_dist_1,energy_ang_45_dist_1,energy_ang_90_dist_1,energy_ang_135_dist_1,energy_ang_0_dist_5,energy_ang_45_dist_5,energy_ang_90_dist_5,energy_ang_135_dist_5,pathology
0,0.796989,0.936951,0.853886,0.960574,1.221642,1.273973,1.215032,1.254506,0.99869,0.998135,...,0.474284,0.689123,0.689054,0.689091,0.689045,0.688969,0.688673,0.688494,0.688683,BENIGN
1,1.231098,1.446281,1.325998,1.504659,1.892971,1.935957,1.928993,2.03393,0.998911,0.998513,...,0.265534,0.516184,0.515934,0.516062,0.515914,0.515431,0.515333,0.515394,0.515299,BENIGN
2,0.786669,0.931541,0.833982,0.958347,1.226459,1.345679,1.3413,1.38896,0.998686,0.998076,...,0.481677,0.694554,0.694418,0.694455,0.694378,0.694268,0.69415,0.693838,0.69403,BENIGN_WITHOUT_CALLBACK
3,0.786669,0.931541,0.833982,0.958347,1.226459,1.345679,1.3413,1.38896,0.998686,0.998076,...,0.481677,0.694554,0.694418,0.694455,0.694378,0.694268,0.69415,0.693838,0.69403,BENIGN_WITHOUT_CALLBACK
4,1.147424,1.354507,1.186222,1.370834,1.769945,1.864975,1.730289,1.803568,0.998917,0.998541,...,0.30133,0.549273,0.549286,0.549392,0.549245,0.548992,0.548983,0.548912,0.548935,BENIGN_WITHOUT_CALLBACK
