# Iniciando a criação do meu primeiro modelo de machine learning
### Projeto de Iniciação Tecnológica
* Objetivo: Criar um modelo simples, verificar os resultados e ir complexando.
* Orientandos: Viviane Botelho, Thatiane Pianoschi, Bernardo Cecchetto

In [1]:
import tensorflow as tf
import keras as ke
import numpy as np
from matplotlib import pyplot as plt
import pathlib 
import os

localImagens = pathlib.Path("/mnt/d/ImagensDatasetFredrik/imagensDataset/")

2023-03-21 20:40:10.212563: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Etapa 1: Criar um dataset a partir do diretório, redimensionar as imagens e separar em classes de positivo e negativo.
* Classe 0: Mamografias com diagnóstico NEGATIVO para a neoplasia
* Classe 1: Mamografias com diagnóstico POSITIVO para a neoplasia

In [2]:
dataset = tf.keras.utils.image_dataset_from_directory(localImagens, 
                                                      color_mode='grayscale', # colocamos em escala de cinza, assim ficamos com apenas 1 canal 
                                                      shuffle=True,
                                                      ) # embaralhar para dispersar os dados, possivelmente evitando bias

Found 2015 files belonging to 2 classes.


2023-03-21 19:20:58.961798: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-21 19:20:58.962511: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [3]:
# Classes
print(dataset.class_names)

['classe0', 'classe1']


In [2]:
# Quantidade de Imagens
quantImagens = len(list(pathlib.Path(localImagens).glob('*/*.png')))

In [3]:
# Outra forma de criar o conjunto de dados
listDataset = tf.data.Dataset.list_files(str(localImagens/'*/*'), shuffle=False)
listDataset = listDataset.shuffle(quantImagens, reshuffle_each_iteration=False)

2023-03-21 20:40:13.943725: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-21 20:40:13.944820: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [4]:
print("Quantidade de imagens: {0}".format(tf.data.experimental.cardinality(listDataset).numpy()))

Quantidade de imagens: 2015


In [5]:
class_names = np.array(sorted([item.name for item in localImagens.glob('*') if item.name != "LICENSE.txt"]))
print(class_names)

# https://www.tensorflow.org/tutorials/load_data/images?hl=pt-br 
# TESTANDO DO TENSOR FLOW
def get_label(file_path):
  # Convert the path to a list of path components
  parts = tf.strings.split(file_path, os.path.sep)
  # The second to last is the class-directory
  one_hot = parts[-2] == class_names
  # Integer encode the label
  return tf.argmax(one_hot)

def decode_img(img):
  # Convert the compressed string to a 3D uint8 tensor
  img = tf.io.decode_jpeg(img, channels=3)
  # Resize the image to the desired size
  return tf.image.resize(img, [256, 256])

def process_path(file_path):
  label = get_label(file_path)
  # Load the raw data from the file as a string
  img = tf.io.read_file(file_path)
  img = decode_img(img)
  return img, label

['classe0' 'classe1']


### Definindo tamanho dos conjuntos de treino e validação

In [6]:
valSize = int(quantImagens * 0.3) # separando 20% para validação
testSize = int(quantImagens * 0.1) # separando 10% para teste
trainDataset = listDataset.skip(valSize+testSize)
valDataset = listDataset.take(valSize)
testDataset = listDataset.take(testSize)

In [7]:
# Verificando
print("Treino: {0}".format(str(tf.data.experimental.cardinality(trainDataset).numpy())))
print("Validacao: {0}".format(str(tf.data.experimental.cardinality(valDataset).numpy())))
print("Teste: {0}".format(str(tf.data.experimental.cardinality(testDataset).numpy())))

Treino: 1210
Validacao: 604
Teste: 201


In [8]:
# Cria o shape (imagem, label)
AUTOTUNE = tf.data.AUTOTUNE # PROCESSAR/CARREGAR AS IMAGENS EM PARALELO
trainDataset = trainDataset.map(process_path, num_parallel_calls=AUTOTUNE)
valDataset = valDataset.map(process_path, num_parallel_calls=AUTOTUNE)
testDataset = testDataset.map(process_path, num_parallel_calls=AUTOTUNE)

In [9]:
for image, label in trainDataset.take(1):
    takeShape = image.numpy().shape
    print(takeShape)

(256, 256, 3)


### Criando Modelo

In [10]:
model = ke.Sequential()
model.add(ke.layers.Input(takeShape))
model.add(ke.layers.Dense(64, activation='relu'))
model.add(ke.layers.Dense(1, activation='sigmoid')) # sigmoid para problemas de classificacao binaria
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 256, 256, 64)      256       
                                                                 
 dense_1 (Dense)             (None, 256, 256, 1)       65        
                                                                 
Total params: 321
Trainable params: 321
Non-trainable params: 0
_________________________________________________________________


In [32]:
# Compilando modelo
model.compile(loss='binary_crossentropy', metrics=['accuracy'])

In [34]:
# Treinando modelo
history = model.fit(trainDataset,
                  batch_size=100,
                  epochs=20,
                  validation_data=valDataset);

Epoch 1/20


ValueError: in user code:

    File "/home/lukasmachado/miniconda3/envs/projITI/lib/python3.10/site-packages/keras/engine/training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "/home/lukasmachado/miniconda3/envs/projITI/lib/python3.10/site-packages/keras/engine/training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/lukasmachado/miniconda3/envs/projITI/lib/python3.10/site-packages/keras/engine/training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "/home/lukasmachado/miniconda3/envs/projITI/lib/python3.10/site-packages/keras/engine/training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "/home/lukasmachado/miniconda3/envs/projITI/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/home/lukasmachado/miniconda3/envs/projITI/lib/python3.10/site-packages/keras/engine/input_spec.py", line 295, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_1" is incompatible with the layer: expected shape=(None, 256, 256), found shape=(256, 256, 3)
