In [1]:
import os
import sys

import numpy as np
import pandas as pd
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import NearMiss
from keras.layers import Dense, Dropout
from keras.models import Sequential
from sklearn import svm
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

sys.path.insert(0, "\\".join(os.path.abspath(os.curdir).split("\\")[:-2:]))

import matplotlib.pyplot as plt
import seaborn as sns

from src.utils.utils import *

# from keras.wrappers.scikit_learn import KerasClassifier
# from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

# Modelos preditivo

## Configuração

In [2]:
SELECTED_DATABASE = "MIAS"

## Importação e análise dos conjuntos de dados

Iniciamos importando o conjunto de dados e realizamos a visualização das 5 primeiras linhas para verificar a estrutura dos dados.

In [3]:
breast_cancer_df = None

match SELECTED_DATABASE:
    case "CMMD":
        breast_cancer_df = pd.read_csv("../../outputs/mamografia/matriz_glcm_features/matriz_features_glcm_CMMD.csv")
    case "CBIS-DDSM":
        breast_cancer_df = pd.read_csv("../../outputs/mamografia/matriz_glcm_features/matriz_features_glcm_CBIS-DDSM.csv")
    case "INBREAST":
        breast_cancer_df = pd.read_csv("../../outputs/mamografia/matriz_glcm_features/matriz_features_glcm_INBREAST.csv")
    case "MIAS":
        breast_cancer_df = pd.read_csv("../../outputs/mamografia/matriz_glcm_features/matriz_features_glcm_MIAS.csv")
    case _:
        raise Exception("Database not found!")
    
breast_cancer_df.head()

Unnamed: 0,dissimilarity_ang_0_dist_1,dissimilarity_ang_45_dist_1,dissimilarity_ang_90_dist_1,dissimilarity_ang_135_dist_1,dissimilarity_ang_0_dist_3,dissimilarity_ang_45_dist_3,dissimilarity_ang_90_dist_3,dissimilarity_ang_135_dist_3,correlation_ang_0_dist_1,correlation_ang_45_dist_1,...,energy_ang_0_dist_1,energy_ang_45_dist_1,energy_ang_90_dist_1,energy_ang_135_dist_1,energy_ang_0_dist_3,energy_ang_45_dist_3,energy_ang_90_dist_3,energy_ang_135_dist_3,pathology,image_path
0,0.74348,0.938587,0.638144,0.87922,1.493057,1.409452,1.165028,1.319702,0.998509,0.997434,...,0.57715,0.576423,0.579305,0.576354,0.574213,0.574944,0.577854,0.574892,Benign,D:/mathe/Documents/BancoDados_IC/Mamografia/MI...
1,1.126747,1.393149,0.968634,1.413172,2.277286,2.194972,1.922073,2.213473,0.997826,0.997085,...,0.488012,0.487575,0.492359,0.487658,0.484513,0.485373,0.48898,0.485563,Benign,D:/mathe/Documents/BancoDados_IC/Mamografia/MI...
2,1.119408,1.43199,1.016493,1.366212,2.148667,2.087481,1.669776,1.97351,0.997504,0.996087,...,0.410421,0.409889,0.411775,0.409951,0.40703,0.408066,0.41103,0.40827,Benign,D:/mathe/Documents/BancoDados_IC/Mamografia/MI...
3,1.119408,1.43199,1.016493,1.366212,2.148667,2.087481,1.669776,1.97351,0.997504,0.996087,...,0.410421,0.409889,0.411775,0.409951,0.40703,0.408066,0.41103,0.40827,Benign,D:/mathe/Documents/BancoDados_IC/Mamografia/MI...
4,0.831069,0.954908,0.66805,1.010285,1.671718,1.468367,1.242472,1.534555,0.997882,0.997707,...,0.586297,0.586001,0.588736,0.586033,0.583368,0.584126,0.586787,0.584278,Benign,D:/mathe/Documents/BancoDados_IC/Mamografia/MI...


Como último passo, verificamos a distribuição das classes. 

In [4]:
breast_cancer_df["pathology"].value_counts(normalize=True)

Benign       0.561983
Malignant    0.438017
Name: pathology, dtype: float64

## Pré-Processamento

In [5]:
# Elimina os dados com classe BENIGN_WITHOUT_CALLBACK ou NORMAL
if SELECTED_DATABASE == "CBIS-DDSM":
    breast_cancer_df = breast_cancer_df[breast_cancer_df["pathology"] != "BENIGN_WITHOUT_CALLBACK"]
elif SELECTED_DATABASE == "INBREAST":
    breast_cancer_df = breast_cancer_df[breast_cancer_df["pathology"] != "NORMAL"]

# Separa em features e labels
X, y = (breast_cancer_df.drop("pathology", axis=1), breast_cancer_df["pathology"])

# Balanceamento de classes
if SELECTED_DATABASE == "CMMD":
    nm = NearMiss(version=1)
    X, y = nm.fit_resample(X, y)
elif SELECTED_DATABASE == "INBREAST":
    smote = SMOTE()
    X, y = smote.fit_resample(X, y)

# Separa os dados em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)

# Salva e retira image_path
image_paths_train = X_train["image_path"]
image_paths_test = X_test["image_path"]
X_train = X_train.drop("image_path", axis=1)
X_test = X_test.drop("image_path", axis=1)

 # Padroniza os dados
""" columns = X_train.columns
scaler = StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=columns)
X_test = pd.DataFrame(scaler.transform(X_test), columns=columns) """

# Tratamos os rótulos categóricos
if SELECTED_DATABASE == "CMMD" or "MIAS":
    y_train = y_train.map({"Benign": 0, "Malignant": 1})
    y_test = y_test.map({"Benign": 0, "Malignant": 1})
elif SELECTED_DATABASE == 'CBIS-DDSM' or 'INBREAST':
    y_train = y_train.map({"BENIGN": 0, "MALIGNANT": 1})
    y_test = y_test.map({"BENIGN": 0, "MALIGNANT": 1})

## Treinamento dos modelos

### Variáveis

In [6]:
angles = ["0", "45", "90", "135"]
distances = ['1', '3']
threshold = 0.5 # Definir o limiar
kf = KFold(n_splits=5)

### Funções

In [7]:
# Função para criar o modelo do Keras
def create_mlp_model(shape):
    model = Sequential()
    model.add(Dense(50, input_dim=shape, activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(50, activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=["accuracy"])
    return model


def evaluate_mlp_model(model, data, target, n_splits, n_epochs, batch_size, threshold=0.5):
    kf = KFold(n_splits = n_splits)
    
    acc = []
    for train_index, test_index in kf.split(data, target): 
        model.fit(data.iloc[train_index], target.iloc[train_index], 
                  epochs=n_epochs, batch_size=batch_size)
        
        y_pred = model.predict(data.iloc[test_index])
        
        # Definir o limiar
        threshold = threshold

        # Transformar as saídas em rótulos
        y_pred = (y_pred > threshold).astype(int)
        
        acc.append(accuracy_score(y_pred, target.iloc[test_index]))

    return (np.array(acc)).mean() * 100

### KNeighborsClassifier

In [8]:
# Treina com todos os angulos e distancias
knn_model = KNeighborsClassifier(n_neighbors=5, weights="distance", metric="euclidean", algorithm="auto")

knn_model.fit(X_train, y_train)
predict = knn_model.predict(X_test)
acurracy = accuracy_score(y_test, predict) * 100
#acurracy = (cross_val_score(knn_model, X_test, y_test, cv=kf).mean()) * 100

# predicts = {"KNeighborsClassifier": {"all": predict}}
scores = {"KNeighborsClassifier": {"all": {"accuracy_score": acurracy}}}

""" for distance in distances:
    for angle in angles:
        columns = [column for column in X_test.columns if f"ang_{angle}_dist_{distance}" in column]
        X_train_filtered = X_train.loc[::, columns]
        X_test_filtered = X_test.loc[::, columns]

        knn_model = KNeighborsClassifier(n_neighbors=5, weights="distance", metric="euclidean", algorithm="auto")
        # knn_model.fit(X_train_filtered, y_train)
        # predict = knn_model.predict(X_test_filtered)
        # acurracy = accuracy_score(y_test, predict) * 100
        acurracy = (cross_val_score(knn_model, X_test_filtered, y_test, cv=kf).mean()) * 100

        #predicts["KNeighborsClassifier"][f"ang_{angle}_dist_{distance}"] = predict
        scores["KNeighborsClassifier"][f"ang_{angle}_dist_{distance}"] = {"accuracy_score": acurracy} """

' for distance in distances:\n    for angle in angles:\n        columns = [column for column in X_test.columns if f"ang_{angle}_dist_{distance}" in column]\n        X_train_filtered = X_train.loc[::, columns]\n        X_test_filtered = X_test.loc[::, columns]\n\n        knn_model = KNeighborsClassifier(n_neighbors=5, weights="distance", metric="euclidean", algorithm="auto")\n        # knn_model.fit(X_train_filtered, y_train)\n        # predict = knn_model.predict(X_test_filtered)\n        # acurracy = accuracy_score(y_test, predict) * 100\n        acurracy = (cross_val_score(knn_model, X_test_filtered, y_test, cv=kf).mean()) * 100\n\n        #predicts["KNeighborsClassifier"][f"ang_{angle}_dist_{distance}"] = predict\n        scores["KNeighborsClassifier"][f"ang_{angle}_dist_{distance}"] = {"accuracy_score": acurracy} '

### Random Forest Classifier

In [9]:
# Treina com todos os angulos e distancias
random_forest_model = RandomForestClassifier(n_estimators=200, max_depth=10, criterion="gini", 
                                             min_samples_leaf=3, min_samples_split=2)
random_forest_model.fit(X_train, y_train)
predict = random_forest_model.predict(X_test)
acurracy = accuracy_score(y_test, predict) * 100
#acurracy = (cross_val_score(random_forest_model, X_test, y_test, cv=kf).mean()) * 100

# predicts["RandomForestClassifier"] = {"all": predict}
scores["RandomForestClassifier"] = {"all": {"accuracy_score": acurracy}}

""" for distance in distances:
    for angle in angles:
        columns = [column for column in X_test.columns if f"ang_{angle}_dist_{distance}" in column]
        X_train_filtered = X_train.loc[::, columns]
        X_test_filtered = X_test.loc[::, columns]
        
        random_forest_model = RandomForestClassifier(n_estimators=200, max_depth=10, criterion="gini", 
                                                     min_samples_leaf=3, min_samples_split=2)
        
        # random_forest_model.fit(X_train_filtered, y_train)
        # predict = random_forest_model.predict(X_test_filtered)
        # acurracy = accuracy_score(y_test, predict) * 100
        acurracy = (cross_val_score(random_forest_model, X_test_filtered, y_test, cv=kf).mean()) * 100

        # predicts["RandomForestClassifier"][f"ang_{angle}_dist_{distance}"] = predict
        scores["RandomForestClassifier"][f"ang_{angle}_dist_{distance}"] = {"accuracy_score": acurracy} """

' for distance in distances:\n    for angle in angles:\n        columns = [column for column in X_test.columns if f"ang_{angle}_dist_{distance}" in column]\n        X_train_filtered = X_train.loc[::, columns]\n        X_test_filtered = X_test.loc[::, columns]\n        \n        random_forest_model = RandomForestClassifier(n_estimators=200, max_depth=10, criterion="gini", \n                                                     min_samples_leaf=3, min_samples_split=2)\n        \n        # random_forest_model.fit(X_train_filtered, y_train)\n        # predict = random_forest_model.predict(X_test_filtered)\n        # acurracy = accuracy_score(y_test, predict) * 100\n        acurracy = (cross_val_score(random_forest_model, X_test_filtered, y_test, cv=kf).mean()) * 100\n\n        # predicts["RandomForestClassifier"][f"ang_{angle}_dist_{distance}"] = predict\n        scores["RandomForestClassifier"][f"ang_{angle}_dist_{distance}"] = {"accuracy_score": acurracy} '

### Support Vector Machines

In [10]:
# Treina com todos os angulos e distancias
svm_model = svm.SVC(C=10, gamma='scale', kernel='linear')
svm_model.fit(X_train, y_train)
predict = svm_model.predict(X_test)
acurracy = accuracy_score(y_test, predict) * 100
#acurracy = (cross_val_score(svm_model, X_test, y_test, cv=kf).mean()) * 100

# predicts["SVM"] = {"all": predict}
scores["SVM"] = {"all": {"accuracy_score": acurracy}}

""" for distance in distances:
    for angle in angles:
        columns = [column for column in X_test.columns if f"ang_{angle}_dist_{distance}" in column]
        X_train_filtered = X_train.loc[::, columns]
        X_test_filtered = X_test.loc[::, columns]

        svm_model = svm.SVC(C=10, gamma='scale', kernel='linear')
        
        # svm_model.fit(X_train_filtered, y_train)
        # predict = svm_model.predict(X_test_filtered)
        # acurracy = accuracy_score(y_test, predict) * 100
        acurracy = (cross_val_score(svm_model, X_test_filtered, y_test, cv=kf).mean()) * 100

        # predicts["SVM"][f"ang_{angle}_dist_{distance}"] = predict
        scores["SVM"][f"ang_{angle}_dist_{distance}"] = {"accuracy_score": acurracy} """

' for distance in distances:\n    for angle in angles:\n        columns = [column for column in X_test.columns if f"ang_{angle}_dist_{distance}" in column]\n        X_train_filtered = X_train.loc[::, columns]\n        X_test_filtered = X_test.loc[::, columns]\n\n        svm_model = svm.SVC(C=10, gamma=\'scale\', kernel=\'linear\')\n        \n        # svm_model.fit(X_train_filtered, y_train)\n        # predict = svm_model.predict(X_test_filtered)\n        # acurracy = accuracy_score(y_test, predict) * 100\n        acurracy = (cross_val_score(svm_model, X_test_filtered, y_test, cv=kf).mean()) * 100\n\n        # predicts["SVM"][f"ang_{angle}_dist_{distance}"] = predict\n        scores["SVM"][f"ang_{angle}_dist_{distance}"] = {"accuracy_score": acurracy} '

### Boosted Tree Classifier

In [11]:
# Treina com todos os angulos e distancias
boosted_tree_model = GradientBoostingClassifier(learning_rate=0.1, max_depth=3, n_estimators=100)
boosted_tree_model.fit(X_train, y_train)
predict = boosted_tree_model.predict(X_test)
acurracy = accuracy_score(y_test, predict) * 100
#acurracy = (cross_val_score(boosted_tree_model, X_test, y_test, cv=kf).mean()) * 100

# predicts["GradientBoostingClassifier"] = {"all": predict}
scores["GradientBoostingClassifier"] = {"all": {"accuracy_score": acurracy}}

""" for distance in distances:
    for angle in angles:
        columns = [column for column in X_test.columns if f"ang_{angle}_dist_{distance}" in column]
        X_train_filtered = X_train.loc[::, columns]
        X_test_filtered = X_test.loc[::, columns]

        boosted_tree_model = GradientBoostingClassifier(learning_rate=0.1, max_depth=3, n_estimators=100)
        
        # boosted_tree_model.fit(X_train_filtered, y_train)
        # predict = boosted_tree_model.predict(X_test_filtered)
        # acurracy = accuracy_score(y_test, predict) * 100
        acurracy = (cross_val_score(boosted_tree_model, X_test_filtered, y_test, cv=kf).mean()) * 100
        
        # predicts["GradientBoostingClassifier"][f"ang_{angle}_dist_{distance}"] = predict
        scores["GradientBoostingClassifier"][f"ang_{angle}_dist_{distance}"] = {"accuracy_score": acurracy} """

' for distance in distances:\n    for angle in angles:\n        columns = [column for column in X_test.columns if f"ang_{angle}_dist_{distance}" in column]\n        X_train_filtered = X_train.loc[::, columns]\n        X_test_filtered = X_test.loc[::, columns]\n\n        boosted_tree_model = GradientBoostingClassifier(learning_rate=0.1, max_depth=3, n_estimators=100)\n        \n        # boosted_tree_model.fit(X_train_filtered, y_train)\n        # predict = boosted_tree_model.predict(X_test_filtered)\n        # acurracy = accuracy_score(y_test, predict) * 100\n        acurracy = (cross_val_score(boosted_tree_model, X_test_filtered, y_test, cv=kf).mean()) * 100\n        \n        # predicts["GradientBoostingClassifier"][f"ang_{angle}_dist_{distance}"] = predict\n        scores["GradientBoostingClassifier"][f"ang_{angle}_dist_{distance}"] = {"accuracy_score": acurracy} '

### Multilayer Perceptron

In [12]:
# Treina com todos os angulos e distancias
mlp = create_mlp_model(X_test.shape[1])
mlp.fit(X_train, y_train, epochs=100, batch_size=5, verbose=0)
predict = mlp.predict(X_test)

predict = (predict > threshold).astype(int) # Transformar as saídas em rótulos
""" acurracy = evaluate_mlp_model(create_mlp_model(X_test.shape[1]),
                              X_test, y_test, 5, 100, 5) """

# predicts["MultilayerPerceptron"] = {"all": predict}
scores["MultilayerPerceptron"] = {"all": {"accuracy_score": acurracy}}

""" for distance in distances:
    for angle in angles:
        columns = [column for column in X_test.columns if f"ang_{angle}_dist_{distance}" in column ]
        X_train_filtered = X_train.loc[::, columns]
        X_test_filtered = X_test.loc[::, columns]

        #mlp = create_model(X_test_filtered.shape[1])
        #mlp.fit(X_train_filtered, y_train, epochs=100, batch_size=5, verbose=0)
        #predict = mlp.predict(X_test_filtered)
        
        #predict = (predict > threshold).astype(int) # Transformar as saídas em rótulos
        #acurracy = accuracy_score(y_test, predict) * 100
        acurracy = evaluate_mlp_model(create_mlp_model(X_test_filtered.shape[1]),
                              X_test_filtered, y_test, 5, 100, 5)

        #predicts["MultilayerPerceptron"][f"ang_{angle}_dist_{distance}"] = predict
        scores["MultilayerPerceptron"][f"ang_{angle}_dist_{distance}"] = { "accuracy_score": acurracy } """



' for distance in distances:\n    for angle in angles:\n        columns = [column for column in X_test.columns if f"ang_{angle}_dist_{distance}" in column ]\n        X_train_filtered = X_train.loc[::, columns]\n        X_test_filtered = X_test.loc[::, columns]\n\n        #mlp = create_model(X_test_filtered.shape[1])\n        #mlp.fit(X_train_filtered, y_train, epochs=100, batch_size=5, verbose=0)\n        #predict = mlp.predict(X_test_filtered)\n        \n        #predict = (predict > threshold).astype(int) # Transformar as saídas em rótulos\n        #acurracy = accuracy_score(y_test, predict) * 100\n        acurracy = evaluate_mlp_model(create_mlp_model(X_test_filtered.shape[1]),\n                              X_test_filtered, y_test, 5, 100, 5)\n\n        #predicts["MultilayerPerceptron"][f"ang_{angle}_dist_{distance}"] = predict\n        scores["MultilayerPerceptron"][f"ang_{angle}_dist_{distance}"] = { "accuracy_score": acurracy } '

## Avaliação dos modelos

### Acurácia

In [13]:
scores_knn = pd.DataFrame(scores["KNeighborsClassifier"])
scores_random_forest = pd.DataFrame(scores["RandomForestClassifier"])
scores_svm = pd.DataFrame(scores["SVM"])
scores_boosted_tree = pd.DataFrame(scores["GradientBoostingClassifier"])
scores_perceptron = pd.DataFrame(scores["MultilayerPerceptron"])

merged_df = pd.concat([scores_knn, scores_random_forest, scores_svm, scores_boosted_tree,
                       scores_perceptron], axis=0).reset_index(drop=True)
merged_df['model'] = ['KNN', 'Random Forest', 'SVM', 'Boosted Tree', 'Multilayer Perceptron']

columns = merged_df.columns.tolist()
columns.remove('model')

merged_df = merged_df[['model', *columns]]
merged_df

Unnamed: 0,model,all
0,KNN,45.945946
1,Random Forest,59.459459
2,SVM,48.648649
3,Boosted Tree,62.162162
4,Multilayer Perceptron,62.162162


### Matriz de confusão

In [14]:
""" model = create_mlp_model(X_test_INBREAST.shape[1])

# Divisão em feature e labels
X_INBREAST, y_INBREAST = (breast_cancer_INBREAST.drop("pathology", axis=1), breast_cancer_INBREAST["pathology"])

# Balanceamento
nm = NearMiss(version=1)
X_INBREAST, y_INBREAST = nm.fit_resample(X_INBREAST, y_INBREAST)

# Divisão em treino e teste
X_train_INBREAST, X_test_INBREAST, y_train_INBREAST, y_test_INBREAST = train_test_split(X_INBREAST, y_INBREAST, test_size=0.30)

# Padronização
columns = X_INBREAST.columns
scaler = StandardScaler()
X_train_INBREAST = pd.DataFrame(scaler.fit_transform(X_train_INBREAST), columns=columns)
X_test_INBREAST = pd.DataFrame(scaler.transform(X_test_INBREAST), columns=columns)

# Tratamento dos rótulos categóricos
y_train_INBREAST = y_train_INBREAST.map({"BENIGN": 0, "MALIGNANT": 1})
y_test_INBREAST = y_test_INBREAST.map({"BENIGN": 0, "MALIGNANT": 1})

# Treinamento e previsão
model.fit(X_train_INBREAST, y_train_INBREAST, epochs=100, batch_size=5, verbose=0)
predict = model.predict(X_test_INBREAST)

true_positive, false_positive = [], []
true_negative, false_negative = [], []

for i in range(len(predict)):
    if predict[i] > 0.5:
        if y_test_INBREAST.iloc[i] == 1:
            true_positive.append(y_test_INBREAST.index[i])
        else:
            false_positive.append(y_test_INBREAST.index[i])
    else:
        if y_test_INBREAST.iloc[i] == 0:
            true_negative.append(y_test_INBREAST.index[i])
        else:
            false_negative.append(y_test_INBREAST.index[i]) """

' model = create_mlp_model(X_test_INBREAST.shape[1])\n\n# Divisão em feature e labels\nX_INBREAST, y_INBREAST = (breast_cancer_INBREAST.drop("pathology", axis=1), breast_cancer_INBREAST["pathology"])\n\n# Balanceamento\nnm = NearMiss(version=1)\nX_INBREAST, y_INBREAST = nm.fit_resample(X_INBREAST, y_INBREAST)\n\n# Divisão em treino e teste\nX_train_INBREAST, X_test_INBREAST, y_train_INBREAST, y_test_INBREAST = train_test_split(X_INBREAST, y_INBREAST, test_size=0.30)\n\n# Padronização\ncolumns = X_INBREAST.columns\nscaler = StandardScaler()\nX_train_INBREAST = pd.DataFrame(scaler.fit_transform(X_train_INBREAST), columns=columns)\nX_test_INBREAST = pd.DataFrame(scaler.transform(X_test_INBREAST), columns=columns)\n\n# Tratamento dos rótulos categóricos\ny_train_INBREAST = y_train_INBREAST.map({"BENIGN": 0, "MALIGNANT": 1})\ny_test_INBREAST = y_test_INBREAST.map({"BENIGN": 0, "MALIGNANT": 1})\n\n# Treinamento e previsão\nmodel.fit(X_train_INBREAST, y_train_INBREAST, epochs=100, batch_size=

In [15]:
""" metadata_INBREAST = load_json("extracted_metadata_INBREAST", "../../outputs/mamografia/extracted_metadata")
metadata_INBREAST

labels = ['Verdadeiro Positivo', 'Falso Positivo', 'Verdadeiro Negativo', 'Falso Negativo']
indexes = [(0, 0), (0, 1), (1, 0), (1, 1)]
iterables = [true_positive, false_positive, true_negative, false_negative]
print(len(true_positive), len(false_positive), len(true_negative), len(false_negative)) """

' metadata_INBREAST = load_json("extracted_metadata_INBREAST", "../../outputs/mamografia/extracted_metadata")\nmetadata_INBREAST\n\nlabels = [\'Verdadeiro Positivo\', \'Falso Positivo\', \'Verdadeiro Negativo\', \'Falso Negativo\']\nindexes = [(0, 0), (0, 1), (1, 0), (1, 1)]\niterables = [true_positive, false_positive, true_negative, false_negative]\nprint(len(true_positive), len(false_positive), len(true_negative), len(false_negative)) '

In [16]:
""" # Tamanho em polegadas (1 polegada = 2.54 cm)
largura_cm = 120
altura_cm = 90
dpi = 300  # Resolução em pontos por polegada (recomendada para impressão)

# Converta as dimensões de cm para polegadas
largura_in = largura_cm / 2.54
altura_in = altura_cm / 2.54

fig, ax = plt.subplots(2, 2, fig_size=(largura_in, altura_in), dpi=dpi)
plt.subplots_adjust(left=0.20, right=0.80, top=0.80, bottom=0.20, wspace=0.2, hspace=0.15)
id_iters = [7, 1, 0, 0]

for label, index, iterable, id_iter in zip(labels, indexes, iterables, id_iters):
    img_index = iterable[id_iter]
    ax[index[0], index[1]].imshow(dcmread(metadata_INBREAST[img_index]['metadata_csv']['image_path']).pixel_array,
                                  cmap=plt.cm.bone)
    ax[index[0], index[1]].set_title(f"{label}", family='Times New Roman', size=14)
    ax[index[0], index[1]].set_xticks([])
    ax[index[0], index[1]].axis('off') """

' # Tamanho em polegadas (1 polegada = 2.54 cm)\nlargura_cm = 120\naltura_cm = 90\ndpi = 300  # Resolução em pontos por polegada (recomendada para impressão)\n\n# Converta as dimensões de cm para polegadas\nlargura_in = largura_cm / 2.54\naltura_in = altura_cm / 2.54\n\nfig, ax = plt.subplots(2, 2, fig_size=(largura_in, altura_in), dpi=dpi)\nplt.subplots_adjust(left=0.20, right=0.80, top=0.80, bottom=0.20, wspace=0.2, hspace=0.15)\nid_iters = [7, 1, 0, 0]\n\nfor label, index, iterable, id_iter in zip(labels, indexes, iterables, id_iters):\n    img_index = iterable[id_iter]\n    ax[index[0], index[1]].imshow(dcmread(metadata_INBREAST[img_index][\'metadata_csv\'][\'image_path\']).pixel_array,\n                                  cmap=plt.cm.bone)\n    ax[index[0], index[1]].set_title(f"{label}", family=\'Times New Roman\', size=14)\n    ax[index[0], index[1]].set_xticks([])\n    ax[index[0], index[1]].axis(\'off\') '

In [17]:
""" metadata_INBREAST = load_json("extracted_metadata_INBREAST", "../../outputs/mamografia/extracted_metadata")
negative = [metadata for metadata in metadata_INBREAST if 
            metadata['metadata_csv']['bi-rads'] not in ['4c', '5', '6']]
positive = [metadata for metadata in metadata_INBREAST if 
            metadata['metadata_csv']['bi-rads'] in ['4c', '5', '6']]

img1 = dcmread(negative[72]['metadata_csv']['image_path']).pixel_array
img2 = dcmread(positive[4]['metadata_csv']['image_path']).pixel_array
img3 = dcmread(negative[10]['metadata_csv']['image_path']).pixel_array
img4 = dcmread(negative[94]['metadata_csv']['image_path']).pixel_array
 """

' metadata_INBREAST = load_json("extracted_metadata_INBREAST", "../../outputs/mamografia/extracted_metadata")\nnegative = [metadata for metadata in metadata_INBREAST if \n            metadata[\'metadata_csv\'][\'bi-rads\'] not in [\'4c\', \'5\', \'6\']]\npositive = [metadata for metadata in metadata_INBREAST if \n            metadata[\'metadata_csv\'][\'bi-rads\'] in [\'4c\', \'5\', \'6\']]\n\nimg1 = dcmread(negative[72][\'metadata_csv\'][\'image_path\']).pixel_array\nimg2 = dcmread(positive[4][\'metadata_csv\'][\'image_path\']).pixel_array\nimg3 = dcmread(negative[10][\'metadata_csv\'][\'image_path\']).pixel_array\nimg4 = dcmread(negative[94][\'metadata_csv\'][\'image_path\']).pixel_array\n '

In [18]:
""" 
labels = ['Verdadeiro Positivo', 'Falso Positivo', 'Verdadeiro Negativo', 'Falso Negativo']
indexes = [(0, 0), (0, 1), (1, 0), (1, 1)]
imgs = [img1, img2, img3, img4]
ids = [72, 4, 10, 94]

# Tamanho em polegadas (1 polegada = 2.54 cm)
largura_cm = 120
altura_cm = 90
dpi = 300  # Resolução em pontos por polegada (recomendada para impressão)

# Converta as dimensões de cm para polegadas
largura_in = largura_cm / 2.54
altura_in = altura_cm / 2.54

fig, ax = plt.subplots(2, 2, figsize=(largura_in, altura_in), dpi=dpi)
#plt.subplots_adjust(left=0.20, right=0.80, top=0.80, bottom=0.20, wspace=0.2, hspace=0.15)
plt.subplots_adjust(wspace=-0.4, hspace=0.15, right=0.8, left=0.2, top=0.8, bottom=0.2)

for label, index, img in zip(labels, indexes, imgs):
    ax[index[0], index[1]].imshow(img, cmap=plt.cm.bone)
    ax[index[0], index[1]].set_title(f"{label}", family='Times New Roman', fontsize=60, pad=20)
    ax[index[0], index[1]].set_xticks([])
    ax[index[0], index[1]].axis('off')
 """

' \nlabels = [\'Verdadeiro Positivo\', \'Falso Positivo\', \'Verdadeiro Negativo\', \'Falso Negativo\']\nindexes = [(0, 0), (0, 1), (1, 0), (1, 1)]\nimgs = [img1, img2, img3, img4]\nids = [72, 4, 10, 94]\n\n# Tamanho em polegadas (1 polegada = 2.54 cm)\nlargura_cm = 120\naltura_cm = 90\ndpi = 300  # Resolução em pontos por polegada (recomendada para impressão)\n\n# Converta as dimensões de cm para polegadas\nlargura_in = largura_cm / 2.54\naltura_in = altura_cm / 2.54\n\nfig, ax = plt.subplots(2, 2, figsize=(largura_in, altura_in), dpi=dpi)\n#plt.subplots_adjust(left=0.20, right=0.80, top=0.80, bottom=0.20, wspace=0.2, hspace=0.15)\nplt.subplots_adjust(wspace=-0.4, hspace=0.15, right=0.8, left=0.2, top=0.8, bottom=0.2)\n\nfor label, index, img in zip(labels, indexes, imgs):\n    ax[index[0], index[1]].imshow(img, cmap=plt.cm.bone)\n    ax[index[0], index[1]].set_title(f"{label}", family=\'Times New Roman\', fontsize=60, pad=20)\n    ax[index[0], index[1]].set_xticks([])\n    ax[index[0