In [None]:
1. Basic mechanics
2. Reading input data
    1. Numpy arrays
    2. Python generators
    3. TFRecord 
    4. text
    5. CSV 
    6. Sets of files
3. Batching dataset elements
    1. Simple batching
    2. Batching tensors with padding
4. Training workflows
    1. Processing multiple epochs
    2. Randomly shuffling input data
5. Preprocessing data
    1. Decoding image data and resizing it
    2. Applying arbitary Python logic
    3. Parsing tf.Example protocol buffer messages
    4. Time series windowing
    5. Resampling
6. Iterator Checkpointing
7. Using tf.data with tf.keras

**Importing the libraries**

In [None]:
import tensorflow as tf

In [None]:
import pathlib
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.set_printoptions(precision=4)

## 1. Basic mechanics

In [None]:
dataset = tf.data.Dataset.from_tensor_slices([8, 3, 0, 8, 2, 1])
print(dataset)

In [None]:
for elem in dataset:
    print(elem)
print('******************************')
for elem in dataset:
    print(elem.numpy())

In [None]:
it = iter(dataset)

print(next(it).numpy())

## 1.1 Dataset Structure

In [None]:
# The simplest way to create a dataset is to create it from a python `list`:

dataset = tf.data.Dataset.from_tensor_slices([1,2,3])
print('type of dataset :', type(dataset))
for element in dataset:
    print(element)
print(list(dataset.as_numpy_iterator()))

In [None]:
# The simplest way to create a dataset is to create it from a python `list`:

dataset = tf.data.Dataset.from_tensor_slices([[1,2,3],[4,5,6]])
for element in dataset:
    print(element)
print(list(dataset.as_numpy_iterator()))

In [None]:
# we can also create dataset form the dictionary

dataset = tf.data.Dataset.from_tensor_slices({"a": [1, 2], "b": [3, 4]})
for element in dataset:
    print(element)
print(list(dataset.as_numpy_iterator()))

In [None]:
# Two tensors can be combined into one Dataset object.

features = tf.constant([[1, 3], [2, 1], [3, 3]]) # ==> 3x2 tensor
labels = tf.constant(['A', 'B', 'A']) # ==> 3x1 tensor

# Both the features and the labels tensors can be converted
# to a Dataset object separately and combined after.

features_dataset = tf.data.Dataset.from_tensor_slices(features)
labels_dataset = tf.data.Dataset.from_tensor_slices(labels)
dataset = tf.data.Dataset.zip((features_dataset, labels_dataset))

print(list(dataset.as_numpy_iterator()))

# A batched feature and label set can be converted to a Dataset
# in similar fashion.

In [None]:
# A batched feature and label set can be converted to a Dataset
# in similar fashion.
batched_features = tf.constant([[[1, 3], [2, 3]],
                                 [[2, 1], [1, 2]],
                                 [[3, 3], [3, 2]]], shape=(3, 2, 2))
batched_labels = tf.constant([['A', 'A'],
                               ['B', 'B'],
                               ['A', 'B']], shape=(3, 2, 1))
dataset = tf.data.Dataset.from_tensor_slices((batched_features, batched_labels))
for element in dataset.as_numpy_iterator():
    print(element)
    break

In [None]:
dataset1 = tf.data.Dataset.from_tensor_slices(tf.random.uniform([4, 10]))

dataset1.element_spec

In [None]:
dataset2 = tf.data.Dataset.from_tensor_slices((tf.constant([1,2,3]),(tf.constant([1,2,3]))))
dataset2.element_spec

In [None]:
dataset3 = tf.data.Dataset.zip((dataset1, dataset2))

dataset3.element_spec

In [None]:
list(dataset3.as_numpy_iterator())