# Analise do dataset

## Carregamento do dataset

In [4]:
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, metrics

import numpy as np

import os
import pathlib
import matplotlib.pyplot as plt

NCLASSES = 2
HEIGHT = 50
WIDTH = 50
NUM_CHANNELS = 3
BATCH_SIZE = 32

def get_label(file_path):
  # convert the path to a list of path components
  parts = tf.strings.split(file_path, os.path.sep)
  # The second to last is the class-directory
  return parts[-2] == classNames

def decode_img(img):
  # convert the compressed string to a 3D uint8 tensor
  img = tf.image.decode_png(img, channels=3)
  # Use `convert_image_dtype` to convert to floats in the [0,1] range.
  img = tf.image.convert_image_dtype(img, tf.float32)
  # resize the image to the desired size.
  return tf.image.resize(img, [WIDTH, HEIGHT])

def get_bytes_and_label(file_path):
  label = get_label(file_path)
  # load the raw data from the file as a string
  img = tf.io.read_file(file_path)
  img = decode_img(img)
  return img, label

data_dir = pathlib.Path('C:/Users/paulo/OneDrive/Ambiente de Trabalho/Escola/Apendizagem automatica 2/Trabalho/AA2/input/cell_images')

print(data_dir)

classNames = np.array(os.listdir(data_dir))

print ("Classes: "+str(classNames))

AUTOTUNE = tf.data.experimental.AUTOTUNE

listset = tf.data.Dataset.list_files("C:/Users/paulo/OneDrive/Ambiente de Trabalho/Escola/Apendizagem automatica 2/Trabalho/AA2/input/cell_images/*/*.png")
dataset = listset.map(get_bytes_and_label, num_parallel_calls = AUTOTUNE)



C:\Users\paulo\OneDrive\Ambiente de Trabalho\Escola\Apendizagem automatica 2\Trabalho\AA2\input\cell_images
Classes: ['Parasitized' 'Uninfected']


In [None]:
for image, label in dataset.take(1):
  print("Image shape: ", image.numpy().shape)
  
dataset_length = [i for i,_ in enumerate(dataset)][-1] + 1
print("Total images in dataset: ",dataset_length)

import os.path
pathInfected = 'C:/Users/paulo/OneDrive/Ambiente de Trabalho/Escola/Apendizagem automatica 2/Trabalho/AA2/input/cell_images/Parasitized'
pathUninfected = 'C:/Users/paulo/OneDrive/Ambiente de Trabalho/Escola/Apendizagem automatica 2/Trabalho/AA2/input/cell_images/Uninfected'

num_files = len([f for f in os.listdir(pathInfected)if os.path.isfile(os.path.join(pathInfected, f))])

print("Infected Images: "+str(num_files))

num_files = len([f for f in os.listdir(pathUninfected)if os.path.isfile(os.path.join(pathUninfected, f))])

print("Uninfected Images: "+str(num_files))

Image shape:  (50, 50, 3)
Total images in dataset:  27558
Infected Images: 13779


## Mostrar um sample de imagens

In [None]:
dataset = dataset.cache()
dataset = dataset.shuffle(buffer_size = dataset_length)
dataset = dataset.batch(batch_size=35)
dataset = dataset.prefetch(buffer_size=AUTOTUNE)
dataset = dataset.repeat()

def show_batch(image_batch, label_batch):
  columns = 6
  rows = BATCH_SIZE / columns + 1  
  plt.figure(figsize=(10, 2 * rows))
  for n in range(BATCH_SIZE):
      ax = plt.subplot(rows, columns, n+1)
      plt.imshow((image_batch[n]))
      plt.title(classNames[label_batch[n]==1][0])
      plt.axis('off')
        
print("Exemplo de imagens")
image_batch, label_batch = next(iter(dataset))  
show_batch(image_batch, label_batch.numpy())
