### <ins> Download and untar dataset from url </ins>

In [None]:
import pathlib
import tensorflow as tf
#import tensorflow_datasets as tfds

dataset_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz"
data_dir = tf.keras.utils.get_file(origin=dataset_url,
                                   fname='flower_photos',
                                   untar=True)
data_dir = pathlib.Path(data_dir)

### <ins> Number of images in directory </ins>

In [None]:
image_count = len(list(data_dir.glob('*/*.jpg')))
print(image_count)

### <ins> View an image from directory </ins>

In [None]:
roses = list(data_dir.glob('roses/*'))
# open a file also to view
PIL.Image.open(str(roses[0]))
#PIL.Image.open(str(roses[1])) etc..

### <ins> Load data from disk </ins>

#### (1) tf.keras.utils.image_dataset_from_directory
(https://www.tensorflow.org/api_docs/python/tf/keras/utils/image_dataset_from_directory)

Generates a tf.data.Dataset from image files in a directory.

**Comments:**
- should specify `image_size`: Size to resize images to after they are read from disk. Defaults to (256, 256). Since the pipeline processes batches of images that must all have the same size, this must be provided.
- `interpolation`: String, the interpolation method used when resizing images. Defaults to bilinear. Supports bilinear, nearest, bicubic, area, lanczos3, lanczos5, gaussian, mitchellcubic.
- `crop_to_aspect_ratio`: 	If True, resize the images without aspect ratio distortion. When the original aspect ratio differs from the target aspect ratio, the output image will be cropped so as to return the largest possible window in the image (of size image_size) that matches the target aspect ratio. By default (crop_to_aspect_ratio=False), aspect ratio may not be preserved.
- `color_mode`: One of "grayscale", "rgb", "rgba". Default: "rgb". Whether the images will be converted to have 1, 3, or 4 channels.
- `label_mode`: 
    - 'int': means that the labels are encoded as integers (e.g. for `sparse_categorical_crossentropy` loss). 'categorical' means that the labels are encoded as a categorical vector (e.g. for categorical_crossentropy loss).
    - 'binary' means that the labels (there can be only 2) are encoded as float32 scalars with values 0 or 1 (e.g. for binary_crossentropy).
    - None (no labels).
- `validation_split`: we could use this to set a valid set. Hence the original folder should contain all images
- `subset` (**Not sure!**): One of "training" or "validation". Only used if validation_split is set. Maybe this is used to control which dataset to load? Should we split images in separate folders (train validation or not????).
- `follow_links`: Whether to visits subdirectories pointed to by symlinks. Defaults to False

Returns: A tf.data.Dataset object. It yields float32 tensors (batch_size, image_size[0], image_size[1], num_channels) If label_mode is None, Otherwise, it yields a tuple (images, labels), where images has shape (batch_size, image_size[0], image_size[1], num_channels), and labels follows the format below.

- if label_mode is int, the labels are an int32 tensor of shape (batch_size,).
- if label_mode is binary, the labels are a float32 tensor of 1s and 0s of shape (batch_size, 1).
- if label_mode is categorial, the labels are a float32 tensor of shape (batch_size, num_classes), representing a one-hot encoding of the class index.

### <ins> Apply transformations to the dataset </ins>

In [None]:
import tensorflow as tf

# from keras.engine import base_layer
# from keras.engine import base_preprocessing_layer

from tensorflow.python.keras.engine import base_layer
from tensorflow.python.keras.engine import base_preprocessing_layer

In [None]:
from tensorflow.python.keras.layers import Rescaling

normalization_layer = Rescaling(1./255) # [0,1] normalization
#normalization_layer = Rescaling(1./127.5, offset=-1)  # [-1,1] normalization

#normalization_layer = tf.keras.layers.Rescaling(1./255)
#tf.keras.layers.Rescaling(1./127.5, offset=-1)  # [-1,1] normalization

### dataset.map(map_func)
This transformation applies map_func to each element of this dataset, and returns a new dataset containing the transformed elements, in the same order as they appeared in the input. map_func can be used to change both the values and the structure of a dataset's elements.

In [None]:
# normalization_layer = tf.keras.layers.Rescaling(1./255) #[0,1] normalization
# # tf.keras.layers.Rescaling(1./127.5, offset=-1)  # [-1,1] normalization

# normalized_ds_tr = train_ds.map(lambda x, y: (normalization_layer(x), y))
# normalized_ds_val = val_ds.map(lambda x, y: (normalization_layer(x), y))

In [None]:
# ## see results of normalization
# image_batch, labels_batch = next(iter(normalized_ds))
# first_image = image_batch[0]

### Convert dataset into numpy iterator

In [None]:
dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3])

dataset.as_numpy_iterator()

### <ins> Configure dataset for performance </ins>

In [None]:
<ins> </ins>