In [1]:
import os
import numpy as np

import tensorflow as tf
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
# Diretório com suas imagens
data_dir = 'C:/Users/estel/Documents/Python_Codes/breast_cancer/preprocessed/images/'

# Lista para armazenar imagens e rótulos
images = []
labels = []

# Loop sobre os arquivos no diretório
for filename in os.listdir(data_dir):
    if filename.endswith('.png'):  # Supondo que suas imagens tenham extensão .jpg
        img = load_img(os.path.join(data_dir, filename), target_size=(300, 300))
        img_array = img_to_array(img)
        images.append(img_array)
        
        # Extrair rótulo do nome do arquivo
        label = filename.split(' ')[0]  # Pega o rótulo antes do espaço em branco
        labels.append(label)

In [3]:
# Codificar rótulos em números
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Converter para array numpy
images = np.array(images)
labels = to_categorical(labels)

In [4]:
labels.shape, images.shape

((780, 3), (780, 300, 300, 3))

In [5]:
# Dividir os dados em conjuntos de treinamento e validação
X_train, X_val, y_train, y_val = train_test_split(images, labels, test_size=0.2, shuffle=True, random_state=42)

In [6]:
# Calcular pesos das classes baseados na distribuição dos dados
class_weights = {}
total_samples = len(y_train)
num_classes = len(np.unique(np.argmax(y_train, axis=1)))
for i in range(num_classes):
    class_count = np.sum(np.argmax(y_train, axis=1) == i)
    class_weights[i] = total_samples / (num_classes * class_count)

In [7]:
# Carregar o modelo VGG-16 pré-treinado
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(300, 300, 3))

# Congelar as camadas do modelo base
for layer in base_model.layers:
    layer.trainable = False

# Adicionar camadas personalizadas
model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))  # 3 classes: benign, malign, normal

# Compilar o modelo
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

In [8]:
model.summary()

In [9]:
# Treinar o modelo
model.fit(X_train, y_train, epochs=15, validation_data=(X_val, y_val), class_weight=class_weights)

Epoch 1/15
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m687s[0m 20s/step - accuracy: 0.4553 - loss: 29.2488 - val_accuracy: 0.6859 - val_loss: 6.5255
Epoch 2/15
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m314s[0m 16s/step - accuracy: 0.8084 - loss: 4.6369 - val_accuracy: 0.7500 - val_loss: 5.3321
Epoch 3/15
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m304s[0m 15s/step - accuracy: 0.9058 - loss: 1.5604 - val_accuracy: 0.7436 - val_loss: 3.5315
Epoch 4/15
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m300s[0m 15s/step - accuracy: 0.9544 - loss: 0.2522 - val_accuracy: 0.7756 - val_loss: 2.9352
Epoch 5/15
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m296s[0m 15s/step - accuracy: 0.9573 - loss: 0.3101 - val_accuracy: 0.7628 - val_loss: 2.6722
Epoch 6/15
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m294s[0m 15s/step - accuracy: 0.9451 - loss: 0.3905 - val_accuracy: 0.7949 - val_loss: 2.6066
Epoch 7/15
[1m20/20[0m [

<keras.src.callbacks.history.History at 0x22ef03e32e0>

In [10]:
# Avaliar o desempenho do modelo
loss, accuracy = model.evaluate(X_val, y_val)
print("Loss:", loss)
print("Accuracy:", accuracy)

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 12s/step - accuracy: 0.7819 - loss: 2.2360
Loss: 2.061725616455078
Accuracy: 0.8012820482254028


In [11]:
# Fazer previsões no conjunto de validação
y_pred = model.predict(X_val)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_val, axis=1)

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 12s/step


In [None]:
# Calcular a matriz de confusão
conf_matrix = confusion_matrix(y_true, y_pred_classes)

# Plotar a matriz de confusão
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', 
            xticklabels=label_encoder.classes_, 
            yticklabels=label_encoder.classes_)
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.show()