In [1]:
import os
import sys

import cv2
import numpy as np
import pandas as pd
from pydicom import dcmread
from skimage.exposure import equalize_adapthist
from skimage.feature import graycomatrix, graycoprops

sys.path.insert(0, "\\".join(os.path.abspath(os.curdir).split("\\")[:-2:]))
from src.utils.utils import load_inbreast_mask, get_fo_features, extract_roi

## Extrair caracteristicas

### 1. Configurações

In [2]:
angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]
angles_labels = ['0', '45', '90', '135']
glcm_properties = ['dissimilarity', 'correlation', 'homogeneity', 'contrast', 'energy', 'ASM']
fo_properties = ['mean', 'std', 'smoothness', 'third_moment', 'uniformity', 'entropy']
distances = [1]

### 2. Databases

##### a. CMMD

In [None]:
df = pd.read_csv('../../outputs/mamografia/cmmd/metadata_cmmd.csv')
glcm_features = {f'{property}_dist_{distance}_ang_{angle}': []
                 for property in glcm_properties
                 for distance in distances
                 for angle in angles_labels}
fo_features = {f'{feature}': [] for feature in fo_properties}

for index, row in df.iterrows():
    image = dcmread(row['image_path']).pixel_array
    image = ((image / image.max()) * 255).astype('uint8')

    #binary_image = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

    clahe_image = equalize_adapthist(image, clip_limit=0.03)
    clahe_image = clahe_image * 255
    clahe_image = clahe_image.astype('uint8')
    clahe_binary_image = cv2.threshold(clahe_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

    #equalized_image = cv2.equalizeHist(image)
    #equalized_binary_image = cv2.threshold(equalized_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

    # Calcula as características de primeira ordem
    for feature_name, feature in get_fo_features(clahe_binary_image).items():
        fo_features[feature_name].append(feature)

    # Calcula a matriz GLCM
    matrix_glcm = graycomatrix(clahe_binary_image, distances=distances, angles=angles, levels=256, symmetric=True, normed=True)

    # Itera sobre as propriedades e calcula as propriedades da matriz GLCM
    for property in glcm_properties:
        glcm_prop = graycoprops(matrix_glcm, property)

        for distance, prop_distance in zip(distances, glcm_prop):  # Itera sobre os distâncias
            for angle, prop in zip(angles_labels, prop_distance):  # Itera sobre os ângulos
                glcm_features[f'{property}_dist_{distance}_ang_{angle}'].append(prop)

first_order_features_df = pd.DataFrame(fo_features)
first_order_features_df['pathology'] = df['pathology']
first_order_features_df.to_csv('../../outputs/mamografia/cmmd/first_order_features_cmmd.csv', index=False)

glcm_features_df = pd.DataFrame(glcm_features)
glcm_features_df['pathology'] = df['pathology']
glcm_features_df.to_csv('../../outputs/mamografia/cmmd/glcm_features_cmmd.csv', index=False)

##### b. CBIS-DDSM 

In [None]:
df = pd.read_csv('../../outputs/mamografia/cbis-ddsm/metadata_calc_case_cbis-ddsm.csv')
df = pd.concat([df, pd.read_csv('../../outputs/mamografia/cbis-ddsm/metadata_mass_case_cbis-ddsm.csv')],
               axis=0).reset_index(drop=True)

glcm_features = {f'{property}_dist_{distance}_ang_{angle}': []
                 for property in glcm_properties
                 for distance in distances
                 for angle in angles_labels}
fo_features = {f'{feature}': [] for feature in fo_properties}
labels = []

for index, row in df.iterrows():
    image_path_column_name = 'image_path'
    image_path = str(row[image_path_column_name])
    isRoi = '_roi' if 'roi' in image_path_column_name else ''

    labels.append(row['pathology'])

    image = dcmread(image_path).pixel_array
    image = ((image / image.max()) * 255).astype('uint8')

    #binary_image = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

    #clahe_image = equalize_adapthist(image, clip_limit=0.03)
    #clahe_image = clahe_image * 255
    #clahe_image = clahe_image.astype('uint8')
    #clahe_binary_image = cv2.threshold(clahe_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

    #equalized_image = cv2.equalizeHist(image)
    #equalized_binary_image = cv2.threshold(equalized_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

    # Calcula as características de primeira ordem
    for feature_name, feature in get_fo_features(image).items():
        fo_features[feature_name].append(feature)

    # Calcula a matriz GLCM
    matrix_glcm = graycomatrix(image, distances=distances, angles=angles, levels=256, symmetric=True, normed=True)

    # Itera sobre as propriedades e calcula as propriedades da matriz GLCM
    for property in glcm_properties:
        glcm_prop = graycoprops(matrix_glcm, property)

        for distance, prop_distance in zip(distances, glcm_prop):  # Itera sobre os distâncias
            for angle, prop in zip(angles_labels, prop_distance):         # Itera sobre os ângulos
                glcm_features[f'{property}_dist_{distance}_ang_{angle}'].append(prop)

first_order_features_df = pd.DataFrame(fo_features)
first_order_features_df['pathology'] = labels
first_order_features_df.to_csv(f'../../outputs/mamografia/cbis-ddsm/first_order_features{isRoi}_cbis-ddsm.csv', index=False)

glcm_features_df = pd.DataFrame(glcm_features)
glcm_features_df['pathology'] = df['pathology']
glcm_features_df.to_csv(f'../../outputs/mamografia/cbis-ddsm/glcm_features{isRoi}_cbis-ddsm.csv', index=False)

##### c. INBREAST

In [None]:
df = pd.read_csv('../../outputs/mamografia/inbreast/metadata_inbreast.csv')
glcm_features = {f'{property}_dist_{distance}_ang_{angle}': []
                 for property in glcm_properties
                 for distance in distances
                 for angle in angles_labels}
fo_features = {f'{feature}': [] for feature in first_order_feature_names}
labels = []

for index, row in df.iterrows():
    image_path_column_name = 'roi_image_path'
    image_path = str(row[image_path_column_name])
    isRoi = '_roi' if 'roi' in image_path_column_name else ''

    if image_path == 'nan':
        continue

    # Pega o rótulo da imagem
    label = row['bi-rads'].replace("4a", "3").replace("4b", "3").replace("4c", "5")  # noqa: E501
    label = int(label)

    if label == 0:
        label = 'inconclusive'
    elif label == 1:
        label = 'normal'
    elif label < 4:
        label = 'benign'
    else:
        label = 'malignant'

    labels.append(label)

    # Carrega a imagem ou a máscara
    image = load_inbreast_mask(image_path) if 'xml' in image_path \
            else dcmread(image_path).pixel_array

    # Converte a imagem para uint8
    image = ((image / image.max()) * 255).astype('uint8')

    """ clahe_image = equalize_adapthist(image, clip_limit=0.03)
    clahe_image = clahe_image * 255
    clahe_image = clahe_image.astype('uint8') """

    #equalized_image = cv2.equalizeHist(image)

    # Calcula as características de primeira ordem
    for feature_name, feature in get_fo_features(image).items():
        fo_features[feature_name].append(feature)

    # Calcula a matriz GLCM
    matrix_glcm = graycomatrix(image, distances=distances, angles=angles, levels=256, symmetric=True, normed=True)

    # Itera sobre as propriedades e calcula as propriedades da matriz GLCM
    for property in glcm_properties:
        glcm_prop = graycoprops(matrix_glcm, property)

        for distance, prop_distance in zip(distances, glcm_prop):  # Itera sobre os distâncias
            for angle, prop in zip(angles_labels, prop_distance):         # Itera sobre os ângulos
                glcm_features[f'{property}_dist_{distance}_ang_{angle}'].append(prop)

first_order_features_df = pd.DataFrame(fo_features)
first_order_features_df['pathology'] = labels
first_order_features_df.to_csv(f'../../outputs/mamografia/inbreast/first_order_features{isRoi}_inbreast.csv', index=False)

glcm_features_df = pd.DataFrame(glcm_features)
glcm_features_df['pathology'] = labels
glcm_features_df.to_csv(f'../../outputs/mamografia/inbreast/glcm_features{isRoi}_inbreast.csv', index=False)

##### d. MIAS

In [3]:
df = pd.read_csv('../../outputs/mamografia/mias/metadata_mias.csv')
glcm_features = {f'{property}_dist_{distance}_ang_{angle}': []
                 for property in glcm_properties
                 for distance in distances
                 for angle in angles_labels}

fo_features = []
labels = []

for index, row in df.iterrows():
    image_path = str(row['image_path'])
    roi_image_path = str(row['roi_image_path'])
    cropped_image_path = str(row['cropped_image_path'])

    if image_path == 'nan' or roi_image_path == 'nan':
        continue

    # Pega o rótulo da imagem
    labels.append(row['pathology'])

    # Lê a imagem
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    mask = cv2.imread(roi_image_path, cv2.IMREAD_GRAYSCALE)

    # Aplica a equalização de histograma adaptativo
    clahe_image = cv2.createCLAHE(clipLimit=6.0, tileGridSize=(8, 8)).apply(image)

    roi = extract_roi(clahe_image, mask)

    # Calcula as características de primeira ordem
    fo_features.append(list(get_fo_features(clahe_image, mask=mask, features=fo_properties).values()))

    # Calcula a matriz GLCM
    matrix_glcm = graycomatrix(roi, distances=distances, angles=angles, levels=256, symmetric=True, normed=True)

    # Remove as 100 primeiras linhas e as 100 primeiras colunas da matriz GLCM
    matrix_glcm = np.delete(matrix_glcm, np.s_[:100], axis=0)
    matrix_glcm = np.delete(matrix_glcm, np.s_[:100], axis=1)

    # Itera sobre as propriedades e calcula as propriedades da matriz GLCM
    for property in glcm_properties:
        glcm_prop = graycoprops(matrix_glcm, property)

        for distance, prop_distance in zip(distances, glcm_prop):  # Itera sobre os distâncias
            for angle, prop in zip(angles_labels, prop_distance):         # Itera sobre os ângulos
                glcm_features[f'{property}_dist_{distance}_ang_{angle}'].append(prop)

# Salva as características de primeira ordem
first_order_features_df = pd.DataFrame(fo_features, columns=fo_properties)
first_order_features_df['pathology'] = labels
first_order_features_df.to_csv(f'../../outputs/mamografia/mias/first_order_features_mias.csv', index=False)

glcm_features_df = pd.DataFrame(glcm_features)
glcm_features_df['pathology'] = labels
glcm_features_df.to_csv(f'../../outputs/mamografia/mias/glcm_features_mias.csv', index=False)