In [1]:
import os
import sys

import pandas as pd
import numpy as np

sys.path.insert(0, "\\".join(os.path.abspath(os.curdir).split("\\")[:-2:]))

from src.utils.utils import *

# Extrair caracteristicas matriz GLCM

In [3]:
path_json = "D:\\mathe\\Documents\\PythonNotebooks\\Radiomica\\outputs\\mamografia\\extracted_metadata"

angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]
properties = ['dissimilarity', 'correlation', 'homogeneity', 'contrast', 'ASM', 'energy']
distances = [1, 3]

### CMMD

In [3]:
name_json = "extracted_metadata_CMMD"

list_metadata = load_json(name_json, path_json)
glcm_features = []
labels = []
image_paths = []

for metadata in list_metadata:
    metadata_csv = metadata['metadata_csv']
    
    tmp_path = "/".join(metadata_csv['image_path'][0].split("/")[:-1])  # noqa: E501
    directory = Path(tmp_path)
    paths_dicom_file = list(directory.rglob("*.dcm*"))
    
    for path_dicom_file in paths_dicom_file:
        dicom_file = dcmread(path_dicom_file)
        image_paths.append(path_dicom_file)
        
        labels.append(metadata_csv['classification'])
        
        # Carrega a matriz da imagem original
        image = dicom_file.pixel_array

        # Redimensiona a imagem original e salva no array
        if image.max() > 256:
            image = cv2.convertScaleAbs(image)

        # image = cv2.equalizeHist(image)
        features = get_glcm_features(image, distances, angles, 256, True, True, properties)
        glcm_features.append(features)

columns = []
angles_labels = get_angles_labels(angles)

for name in properties:
    for distance in distances:
        for ang in angles_labels:
            columns.append(name + "_ang_" + ang + "_dist_" + str(distance))

glcm_df_CMMD = pd.DataFrame(glcm_features, columns=columns)
glcm_df_CMMD['pathology'] = labels
glcm_df_CMMD['image_path'] = image_paths
glcm_df_CMMD.to_csv("../../outputs/mamografia/matriz_glcm_features/matriz_features_glcm_CMMD.csv", index=False)
glcm_df_CMMD.head()

Unnamed: 0,dissimilarity_ang_0_dist_1,dissimilarity_ang_45_dist_1,dissimilarity_ang_90_dist_1,dissimilarity_ang_135_dist_1,dissimilarity_ang_0_dist_3,dissimilarity_ang_45_dist_3,dissimilarity_ang_90_dist_3,dissimilarity_ang_135_dist_3,correlation_ang_0_dist_1,correlation_ang_45_dist_1,...,energy_ang_0_dist_1,energy_ang_45_dist_1,energy_ang_90_dist_1,energy_ang_135_dist_1,energy_ang_0_dist_3,energy_ang_45_dist_3,energy_ang_90_dist_3,energy_ang_135_dist_3,pathology,image_path
0,0.625476,0.707031,0.627003,0.705654,0.849761,0.852868,0.843262,0.836354,0.991534,0.989223,...,0.917298,0.917229,0.917193,0.917241,0.917209,0.917115,0.916896,0.917144,Benign,D:\mathe\Documents\BancoDados_IC\Mamografia\CM...
1,1.184211,1.307271,1.150881,1.317487,1.571586,1.544999,1.489975,1.533504,0.989185,0.986792,...,0.813474,0.813478,0.813445,0.813389,0.813267,0.81338,0.813187,0.8132,Benign,D:\mathe\Documents\BancoDados_IC\Mamografia\CM...
2,1.14027,1.30222,1.141987,1.289631,1.560706,1.58245,1.577239,1.528206,0.991724,0.989122,...,0.878428,0.878354,0.878307,0.87834,0.8783,0.878218,0.877938,0.87819,Benign,D:\mathe\Documents\BancoDados_IC\Mamografia\CM...
3,1.682375,1.903251,1.64869,1.878657,2.259558,2.274373,2.170288,2.173169,0.987955,0.984806,...,0.772685,0.772592,0.772693,0.772697,0.772412,0.772365,0.772438,0.772575,Benign,D:\mathe\Documents\BancoDados_IC\Mamografia\CM...
4,1.65904,1.898933,1.665683,1.884636,2.188467,2.233676,2.251008,2.179951,0.98604,0.981888,...,0.781069,0.780941,0.78092,0.780936,0.780836,0.780698,0.780388,0.780689,Benign,D:\mathe\Documents\BancoDados_IC\Mamografia\CM...


### CBIS-DDSM 

In [4]:
name_json = "extracted_metadata_CBIS-DDSM"

list_metadata = load_json(name_json, path_json)
glcm_features = []
labels = []
image_paths = []

for metadata in list_metadata:
    metadata_csv = metadata['metadata_csv']
    
    directory = Path(metadata_csv['cropped_image_path'])
    paths_dicom_file = list(directory.rglob("*.dcm*"))
    
    for path_dicom_file in paths_dicom_file:
        dicom_file = dcmread(path_dicom_file)
        image_paths.append(path_dicom_file)
        
        labels.append(metadata_csv['pathology'])
        
        # Carrega a matriz da imagem original
        image = dicom_file.pixel_array

        # Redimensiona a imagem original e salva no array
        if image.max() > 256:
            image = cv2.convertScaleAbs(image)

        # image = cv2.equalizeHist(image)
        features = get_glcm_features(image, distances, angles, 256, True, True, properties)
        glcm_features.append(features)

columns = []
angles_labels = get_angles_labels(angles)

for name in properties:
    for distance in distances:
        for ang in angles_labels:
            columns.append(name + "_ang_" + ang + "_dist_" + str(distance))

glcm_df_CBIS_DDSM = pd.DataFrame(glcm_features, columns=columns)
glcm_df_CBIS_DDSM['pathology'] = labels
glcm_df_CBIS_DDSM['image_path'] = image_paths
glcm_df_CBIS_DDSM.to_csv("../../outputs/mamografia/matriz_glcm_features/matriz_features_glcm_CBIS-DDSM.csv", index=False)
glcm_df_CBIS_DDSM.head()

Unnamed: 0,dissimilarity_ang_0_dist_1,dissimilarity_ang_45_dist_1,dissimilarity_ang_90_dist_1,dissimilarity_ang_135_dist_1,dissimilarity_ang_0_dist_3,dissimilarity_ang_45_dist_3,dissimilarity_ang_90_dist_3,dissimilarity_ang_135_dist_3,correlation_ang_0_dist_1,correlation_ang_45_dist_1,...,energy_ang_0_dist_1,energy_ang_45_dist_1,energy_ang_90_dist_1,energy_ang_135_dist_1,energy_ang_0_dist_3,energy_ang_45_dist_3,energy_ang_90_dist_3,energy_ang_135_dist_3,pathology,image_path
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,BENIGN,D:\mathe\Documents\BancoDados_IC\Mamografia\CB...
1,0.007659,0.013195,0.008244,0.011729,0.022992,0.026295,0.024743,0.023471,0.99451,0.990544,...,0.997246,0.997234,0.997245,0.997237,0.997214,0.997207,0.997211,0.997212,BENIGN,D:\mathe\Documents\BancoDados_IC\Mamografia\CB...
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,BENIGN,D:\mathe\Documents\BancoDados_IC\Mamografia\CB...
3,0.007924,0.012117,0.007359,0.012223,0.023787,0.024177,0.022088,0.024071,0.993923,0.990709,...,0.997424,0.997416,0.997426,0.997415,0.997392,0.997391,0.997396,0.997391,BENIGN,D:\mathe\Documents\BancoDados_IC\Mamografia\CB...
4,0.001892,0.000948,0.000947,0.000948,0.000949,0.000952,0.000952,0.000952,-4e-06,-2e-06,...,0.999993,0.999996,0.999996,0.999996,0.999996,0.999996,0.999996,0.999996,BENIGN_WITHOUT_CALLBACK,D:\mathe\Documents\BancoDados_IC\Mamografia\CB...


### INBREAST

In [7]:
name_json = "extracted_metadata_INBREAST"

list_metadata = load_json(name_json, path_json)
glcm_features = []
labels = []
image_paths = []

for metadata in list_metadata:
    metadata_csv = metadata['metadata_csv']
    
    paths_dicom_file = [metadata_csv['cropped_image_path']]
    
    for path_dicom_file in paths_dicom_file:
        #dicom_file = dcmread(path_dicom_file)
            
        
        label = metadata_csv['bi-rads'].replace("4a", "3").replace("4b", "3").replace("4c", "5")  # noqa: E501
        label = int(label)
        
        if label == 0:
            label = 'INCONCLUSIVE'
        elif label == 1:
            label = 'NORMAL'
        elif label < 4:
            label = 'BENIGN'    
        else:
            label = 'MALIGNANT'
        
        # Carrega a matriz da imagem original
        # image = dicom_file.pixel_array
        
        if label not in ['BENIGN', 'MALIGNANT']:   
            continue      
         
        image = load_inbreast_mask(path_dicom_file)
        
        image = image.astype(np.uint8)

        image_paths.append(path_dicom_file)
        labels.append(label)
        
        # Redimensiona a imagem original e salva no array
        if image.max() > 256:
            image = cv2.convertScaleAbs(image)

        #image = cv2.equalizeHist(image)
        features = get_glcm_features(image, distances, angles, 256, True, True, properties)
        glcm_features.append(features)

columns = []
angles_labels = get_angles_labels(angles)

for name in properties:
    for distance in distances:
        for ang in angles_labels:
            columns.append(name + "_ang_" + ang + "_dist_" + str(distance))

glcm_df_INBREAST = pd.DataFrame(glcm_features, columns=columns)
glcm_df_INBREAST['pathology'] = labels
glcm_df_INBREAST['image_path'] = image_paths
glcm_df_INBREAST.to_csv("../../outputs/mamografia/matriz_glcm_features/matriz_features_glcm_INBREAST.csv", index=False)
glcm_df_INBREAST.head()

Unnamed: 0,dissimilarity_ang_0_dist_1,dissimilarity_ang_45_dist_1,dissimilarity_ang_90_dist_1,dissimilarity_ang_135_dist_1,dissimilarity_ang_0_dist_3,dissimilarity_ang_45_dist_3,dissimilarity_ang_90_dist_3,dissimilarity_ang_135_dist_3,correlation_ang_0_dist_1,correlation_ang_45_dist_1,...,energy_ang_0_dist_1,energy_ang_45_dist_1,energy_ang_90_dist_1,energy_ang_135_dist_1,energy_ang_0_dist_3,energy_ang_45_dist_3,energy_ang_90_dist_3,energy_ang_135_dist_3,pathology,image_path
0,4.9e-05,6.6e-05,4.6e-05,6.9e-05,0.000147,0.000133,0.000138,0.000138,0.99523,0.99352,...,0.99484,0.99483,0.994842,0.994829,0.994788,0.994794,0.994793,0.994791,BENIGN,D:/mathe/Documents/BancoDados_IC/Mamografia/IN...
1,3.1e-05,5.5e-05,4.6e-05,5.5e-05,9.2e-05,0.00011,0.000137,0.00011,0.996506,0.993771,...,0.995562,0.995549,0.995551,0.995549,0.995535,0.995522,0.995503,0.995522,BENIGN,D:/mathe/Documents/BancoDados_IC/Mamografia/IN...
2,8.5e-05,0.000122,8.9e-05,0.000127,0.000252,0.000242,0.000263,0.000251,0.994538,0.992134,...,0.992152,0.992131,0.99215,0.992129,0.992063,0.992066,0.992059,0.992062,MALIGNANT,D:/mathe/Documents/BancoDados_IC/Mamografia/IN...
3,6e-06,8e-06,7e-06,9e-06,1.8e-05,1.6e-05,1.9e-05,1.8e-05,0.950234,0.93483,...,0.999935,0.999934,0.999935,0.999933,0.999929,0.99993,0.999928,0.999929,BENIGN,D:/mathe/Documents/BancoDados_IC/Mamografia/IN...
4,9.6e-05,0.000151,0.000106,0.000135,0.000285,0.000301,0.000317,0.000269,0.994432,0.991198,...,0.991336,0.991306,0.991331,0.991314,0.991235,0.991226,0.991221,0.991242,MALIGNANT,D:/mathe/Documents/BancoDados_IC/Mamografia/IN...


### MIAS

In [6]:
name_json = "extracted_metadata_MIAS"

list_metadata = load_json(name_json, path_json)
glcm_features = []
labels = []
image_paths = []

for metadata in list_metadata:
    metadata_txt = metadata['metadata_txt']
    
    if 'classification' not in metadata_txt.keys() or 'cropped_image_path' not in metadata_txt.keys():
        continue
    
    image_path = metadata_txt['cropped_image_path']
    image_paths.append(image_path)
    labels.append(metadata_txt['classification'])
    
    # Carrega a matriz da imagem original
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    # Redimensiona a imagem original e salva no array
    if image.max() > 256:
        image = cv2.convertScaleAbs(image)

    features = get_glcm_features(image, distances, angles, 256, True, True, properties)
    glcm_features.append(features)

columns = []
angles_labels = get_angles_labels(angles)

for name in properties:
    for distance in distances:
        for ang in angles_labels:
            columns.append(name + "_ang_" + ang + "_dist_" + str(distance))

glcm_df_MIAS = pd.DataFrame(glcm_features, columns=columns)
glcm_df_MIAS['pathology'] = labels
glcm_df_MIAS['image_path'] = image_paths
glcm_df_MIAS.to_csv("../../outputs/mamografia/matriz_glcm_features/matriz_features_glcm_MIAS.csv", index=False)
glcm_df_MIAS.head()

Unnamed: 0,dissimilarity_ang_0_dist_1,dissimilarity_ang_45_dist_1,dissimilarity_ang_90_dist_1,dissimilarity_ang_135_dist_1,dissimilarity_ang_0_dist_3,dissimilarity_ang_45_dist_3,dissimilarity_ang_90_dist_3,dissimilarity_ang_135_dist_3,correlation_ang_0_dist_1,correlation_ang_45_dist_1,...,energy_ang_0_dist_1,energy_ang_45_dist_1,energy_ang_90_dist_1,energy_ang_135_dist_1,energy_ang_0_dist_3,energy_ang_45_dist_3,energy_ang_90_dist_3,energy_ang_135_dist_3,pathology,image_path
0,1.770133,2.28658,1.691272,1.960602,3.421132,3.492933,3.177814,2.834678,0.999292,0.998775,...,0.043258,0.041618,0.04479,0.042723,0.038038,0.038508,0.040929,0.040071,Benign,D:/mathe/Documents/BancoDados_IC/Mamografia/MI...
1,1.246007,1.331877,0.921506,1.352017,2.226141,1.817961,1.467794,1.889652,0.995981,0.994838,...,0.06249,0.062114,0.075834,0.061525,0.050647,0.055793,0.064539,0.054605,Benign,D:/mathe/Documents/BancoDados_IC/Mamografia/MI...
2,1.753107,2.108589,1.755367,2.075266,3.255263,2.887634,2.528947,2.8044,0.989984,0.985712,...,0.053241,0.048751,0.052407,0.049765,0.042331,0.043433,0.044936,0.044625,Benign,D:/mathe/Documents/BancoDados_IC/Mamografia/MI...
3,1.753107,2.108589,1.755367,2.075266,3.255263,2.887634,2.528947,2.8044,0.989984,0.985712,...,0.053241,0.048751,0.052407,0.049765,0.042331,0.043433,0.044936,0.044625,Benign,D:/mathe/Documents/BancoDados_IC/Mamografia/MI...
4,2.299767,2.577988,1.912354,2.634793,4.54305,3.711182,3.366763,4.018066,0.986102,0.982539,...,0.042799,0.040907,0.046075,0.040068,0.032598,0.035548,0.036518,0.034015,Benign,D:/mathe/Documents/BancoDados_IC/Mamografia/MI...
