In [1]:
import os
import sys

import cv2
import numpy as np
import pandas as pd
from pydicom import dcmread
from skimage.exposure import equalize_adapthist
from skimage.feature import graycomatrix, graycoprops

sys.path.insert(0, "\\".join(os.path.abspath(os.curdir).split("\\")[:-2:]))
from src.utils.utils import load_inbreast_mask

## Extrair caracteristicas matriz GLCM

### 1. Configurações

In [2]:
angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]
angles_labels = ['0', '45', '90', '135']
properties = ['dissimilarity', 'correlation', 'homogeneity', 'contrast', 'ASM', 'energy']
# first_order_features_labels = ['mean', 'std', 'variance', 'skewness', 'kurtosis']
distances = [1]

### 2. Databases

##### a. CMMD

In [3]:
df = pd.read_csv('../../outputs/mamografia/cmmd/metadata_cmmd.csv')
glcm_features = {f'{property}_dist_{distance}_ang_{angle}': [] 
                 for property in properties
                 for distance in distances
                 for angle in angles_labels}

for index, row in df.iterrows():
    image = dcmread(row['image_path']).pixel_array
    image = ((image / image.max()) * 255).astype('uint8')
    
    clahe_image = equalize_adapthist(image, clip_limit=0.03)
    clahe_image = clahe_image * 255
    clahe_image = clahe_image.astype('uint8')
    clahe_binary_image = cv2.threshold(clahe_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    
    #equalized_image = cv2.equalizeHist(image)
    #binary_image = cv2.threshold(equalized_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    
    # Calcula a matriz GLCM
    matrix_glcm = graycomatrix(clahe_binary_image, distances=distances, angles=angles, levels=256, symmetric=True, normed=True)
    
    # Itera sobre as propriedades e calcula as propriedades da matriz GLCM
    for property in properties:
        glcm_prop = graycoprops(matrix_glcm, property)
        
        for distance, prop_distance in zip(distances, glcm_prop):  # Itera sobre os distâncias
            for angle, prop in zip(angles_labels, prop_distance):  # Itera sobre os ângulos
                glcm_features[f'{property}_dist_{distance}_ang_{angle}'].append(prop)

glcm_features_df = pd.DataFrame(glcm_features)
glcm_features_df['pathology'] = df['pathology']
glcm_features_df.to_csv('../../outputs/mamografia/cmmd/glcm_features_cmmd.csv', index=False)

##### b. CBIS-DDSM 

In [None]:
df = pd.read_csv('../../outputs/mamografia/cbis-ddsm/metadata_calc_case_cbis-ddsm.csv')
df = pd.concat([df, pd.read_csv('../../outputs/mamografia/cbis-ddsm/metadata_mass_case_cbis-ddsm.csv')],
               axis=0).reset_index(drop=True)

glcm_features = {f'{property}_dist_{distance}_ang_{angle}': [] 
                 for property in properties
                 for distance in distances
                 for angle in angles}

for index, row in df.iterrows():
    image = dcmread(row['image_path']).pixel_array
    image = ((image / image.max()) * 255).astype('uint8')
    
    # Calcula a matriz GLCM
    matrix_glcm = graycomatrix(image, distances=distances, angles=angles, levels=256, symmetric=True, normed=True)
    
    # Itera sobre as propriedades e calcula as propriedades da matriz GLCM
    for property in properties:
        glcm_prop = graycoprops(matrix_glcm, property)
        
        for distance, prop_distance in zip(distances, glcm_prop):  # Itera sobre os distâncias
            for angle, prop in zip(angles, prop_distance):         # Itera sobre os ângulos
                glcm_features[f'{property}_dist_{distance}_ang_{angle}'].append(prop)

glcm_features_df = pd.DataFrame(glcm_features)
glcm_features_df['pathology'] = df['pathology']
glcm_features_df.to_csv('../../outputs/mamografia/cbis-ddsm/glcm_features_cbis-ddsm.csv', index=False)

##### c. INBREAST

In [3]:
df = pd.read_csv('../../outputs/mamografia/inbreast/metadata_inbreast.csv')
glcm_features = {f'{property}_dist_{distance}_ang_{angle}': [] 
                 for property in properties
                 for distance in distances
                 for angle in angles_labels}
labels = []

for index, row in df.iterrows():
    image_path_column_name = 'roi_image_path'
    image_path = str(row[image_path_column_name])
    isRoi = '_roi' if 'roi' in image_path_column_name else ''
    
    if image_path == 'nan':
        continue
    
    # Pega o rótulo da imagem
    label = row['bi-rads'].replace("4a", "3").replace("4b", "3").replace("4c", "5")  # noqa: E501
    label = int(label)
        
    if label == 0:
        label = 'inconclusive'
    elif label == 1:
        label = 'normal'
    elif label < 4:
        label = 'benign'
    else:
        label = 'malignant'

    labels.append(label)
    
    # Carrega a imagem ou a máscara
    image = load_inbreast_mask(image_path) if 'xml' in image_path \
            else dcmread(image_path).pixel_array

    # Converte a imagem para uint8
    image = ((image / image.max()) * 255).astype('uint8')
    
    # equalized_image = cv2.equalizeHist(image)
    
    """ clahe_image = equalize_adapthist(image, clip_limit=0.03)
    clahe_image = clahe_image * 255
    clahe_image = clahe_image.astype('uint8') """
    
    # Calcula a matriz GLCM
    matrix_glcm = graycomatrix(image, distances=distances, angles=angles, levels=256, symmetric=True, normed=True)
    
    # Itera sobre as propriedades e calcula as propriedades da matriz GLCM
    for property in properties:
        glcm_prop = graycoprops(matrix_glcm, property)
        
        for distance, prop_distance in zip(distances, glcm_prop):  # Itera sobre os distâncias
            for angle, prop in zip(angles_labels, prop_distance):         # Itera sobre os ângulos
                glcm_features[f'{property}_dist_{distance}_ang_{angle}'].append(prop)

glcm_features_df = pd.DataFrame(glcm_features)
glcm_features_df['pathology'] = labels
glcm_features_df.to_csv(f'../../outputs/mamografia/inbreast/glcm_features{isRoi}_inbreast.csv', index=False)
glcm_features_df['pathology'].value_counts(dropna=False)

benign       264
malignant     79
Name: pathology, dtype: int64

##### d. MIAS

In [3]:
df = pd.read_csv('../../outputs/mamografia/mias/metadata_mias.csv')
glcm_features = {f'{property}_dist_{distance}_ang_{angle}': [] 
                 for property in properties
                 for distance in distances
                 for angle in angles_labels}

labels = []

for index, row in df.iterrows():
    image_path_column_name = 'roi_image_path'
    image_path = str(row[image_path_column_name])
    isRoi = '_roi' if 'roi' in image_path_column_name else ''

    if image_path == 'nan':
        continue
    
    # Pega o rótulo da imagem
    labels.append(row['pathology'])

    # Lê a imagem
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    
    clahe_image = equalize_adapthist(image, clip_limit=0.03)
    clahe_image = clahe_image * 255
    clahe_image = clahe_image.astype('uint8')
    clahe_binary_image = cv2.threshold(clahe_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    
    #equalized_image = cv2.equalizeHist(image)
    #binary_image = cv2.threshold(equalized_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    # Calcula a matriz GLCM
    matrix_glcm = graycomatrix(clahe_binary_image, distances=distances, angles=angles, levels=256, symmetric=True, normed=True)

    # Itera sobre as propriedades e calcula as propriedades da matriz GLCM
    for property in properties:
        glcm_prop = graycoprops(matrix_glcm, property)
        
        for distance, prop_distance in zip(distances, glcm_prop):  # Itera sobre os distâncias
            for angle, prop in zip(angles_labels, prop_distance):         # Itera sobre os ângulos
                glcm_features[f'{property}_dist_{distance}_ang_{angle}'].append(prop)

glcm_features_df = pd.DataFrame(glcm_features)
glcm_features_df['pathology'] = labels
glcm_features_df.to_csv(f'../../outputs/mamografia/mias/glcm_features{isRoi}_mias.csv', index=False)
glcm_features_df['pathology'].value_counts(dropna=False)

benign       68
malignant    51
Name: pathology, dtype: int64