<a href="https://colab.research.google.com/github/ryzhokhina/Tensorflow-Dev-Exam-Preparation/blob/main/Book/tensorflow_data_api.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds

In [2]:
tf.__version__

'2.12.0'

1.   from_tensor_slice(<tensor>)
2.   repeat()
3.   batch()
4.   unbatch()
5.   map()
6.   apply()
7.   take()

In [3]:
# TF Datasets

X = tf.range(10)
dataset = tf.data.Dataset.from_tensor_slices(X)
dataset

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

In [4]:
for item in dataset:
  print(item)

tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor(4, shape=(), dtype=int32)
tf.Tensor(5, shape=(), dtype=int32)
tf.Tensor(6, shape=(), dtype=int32)
tf.Tensor(7, shape=(), dtype=int32)
tf.Tensor(8, shape=(), dtype=int32)
tf.Tensor(9, shape=(), dtype=int32)


In [5]:
#Chaining Transformations
dataset1 = dataset.repeat(3).batch(7)
for item in dataset1:
  print(item)

tf.Tensor([0 1 2 3 4 5 6], shape=(7,), dtype=int32)
tf.Tensor([7 8 9 0 1 2 3], shape=(7,), dtype=int32)
tf.Tensor([4 5 6 7 8 9 0], shape=(7,), dtype=int32)
tf.Tensor([1 2 3 4 5 6 7], shape=(7,), dtype=int32)
tf.Tensor([8 9], shape=(2,), dtype=int32)


In [6]:
dataset2 = dataset.repeat(3).batch(7, drop_remainder= True)
for item in dataset2:
  print(item)

tf.Tensor([0 1 2 3 4 5 6], shape=(7,), dtype=int32)
tf.Tensor([7 8 9 0 1 2 3], shape=(7,), dtype=int32)
tf.Tensor([4 5 6 7 8 9 0], shape=(7,), dtype=int32)
tf.Tensor([1 2 3 4 5 6 7], shape=(7,), dtype=int32)


**The dataset methods do not modify datasets, they create new ones,
so make sure to keep a reference to these new datasets (e.g., data
set = ...), or else nothing will happen.**

**map() applies a transformation to each item**

In [7]:
# apply any transformation
for item in dataset1.map(lambda x: x*2, num_parallel_calls= 2): #  num_parallel_calls use to speed up the transformation
  print(item)


tf.Tensor([ 0  2  4  6  8 10 12], shape=(7,), dtype=int32)
tf.Tensor([14 16 18  0  2  4  6], shape=(7,), dtype=int32)
tf.Tensor([ 8 10 12 14 16 18  0], shape=(7,), dtype=int32)
tf.Tensor([ 2  4  6  8 10 12 14], shape=(7,), dtype=int32)
tf.Tensor([16 18], shape=(2,), dtype=int32)


**apply() method applies a transformation to the dataset as a whole**:


The argument of apply is a function that takes a Dataset and returns a Dataset when the argument of map is a function that takes one element and returns one transformed element.

In [8]:
for item in dataset1.apply(tf.data.experimental.unbatch()):
  print(item)

Instructions for updating:
Use `tf.data.Dataset.unbatch()`.


tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor(4, shape=(), dtype=int32)
tf.Tensor(5, shape=(), dtype=int32)
tf.Tensor(6, shape=(), dtype=int32)
tf.Tensor(7, shape=(), dtype=int32)
tf.Tensor(8, shape=(), dtype=int32)
tf.Tensor(9, shape=(), dtype=int32)
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor(4, shape=(), dtype=int32)
tf.Tensor(5, shape=(), dtype=int32)
tf.Tensor(6, shape=(), dtype=int32)
tf.Tensor(7, shape=(), dtype=int32)
tf.Tensor(8, shape=(), dtype=int32)
tf.Tensor(9, shape=(), dtype=int32)
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor(4, shape=(), dtype=int32)
tf.Tensor(5, shape=(), dtype=int32)
tf.Tensor(6, shape=(), dtype=int32)
tf.Tensor(7, shape=(), dtype

In [9]:
dataset = tf.data.Dataset.range(100)
def dataset_fn(ds):
  return ds.filter(lambda x: x < 5)

dataset = dataset.apply(dataset_fn)
list(dataset.as_numpy_iterator())
[0, 1, 2, 3, 4]

[0, 1, 2, 3, 4]

In [10]:
dataset1.apply(dataset_fn)

ValueError: ignored

In [11]:
dataset5 = dataset2.apply(tf.data.experimental.unbatch())

In [None]:
list(dataset5.as_numpy_iterator())

In [13]:
dataset6 = dataset5.filter(lambda x: x < 5)
list(dataset6.as_numpy_iterator())

[0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4]

In [14]:
for item in dataset6.take(5):
  print(item)

tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor(4, shape=(), dtype=int32)


1.   shuffle()

In [18]:
dataset7 = tf.data.Dataset.range(10).repeat(3)
dataset7 = dataset7.shuffle(8, seed= 42).batch(5)
for item in dataset7:
  print(item)

tf.Tensor([1 8 6 3 5], shape=(5,), dtype=int64)
tf.Tensor([9 2 4 0 2], shape=(5,), dtype=int64)
tf.Tensor([0 7 3 4 9], shape=(5,), dtype=int64)
tf.Tensor([5 3 6 1 6], shape=(5,), dtype=int64)
tf.Tensor([4 8 7 7 0], shape=(5,), dtype=int64)
tf.Tensor([1 5 9 8 2], shape=(5,), dtype=int64)


In [23]:
dataset7 = tf.data.Dataset.range(10).repeat(3)
dataset7 = dataset7.shuffle(8, seed= 42, reshuffle_each_iteration=False).batch(5)
for item in dataset7:
  print(item)

tf.Tensor([0 5 7 8 9], shape=(5,), dtype=int64)
tf.Tensor([1 0 6 4 5], shape=(5,), dtype=int64)
tf.Tensor([4 6 3 7 9], shape=(5,), dtype=int64)
tf.Tensor([8 2 2 2 1], shape=(5,), dtype=int64)
tf.Tensor([3 8 6 5 1], shape=(5,), dtype=int64)
tf.Tensor([4 9 7 3 0], shape=(5,), dtype=int64)


###Preprocessing the Data

concatenate(), zip(), window(),reduce(), cache(), shard(), flat_map() and padded_batch() from_generator() and from_tensors()

In [25]:
x = tf.constant([1, 4])
y = tf.constant([2, 5])
z = tf.constant([3, 6])
ls =[x,y,z]
ls
#tf.stack([x, y, z])

[<tf.Tensor: shape=(2,), dtype=int32, numpy=array([1, 4], dtype=int32)>,
 <tf.Tensor: shape=(2,), dtype=int32, numpy=array([2, 5], dtype=int32)>,
 <tf.Tensor: shape=(2,), dtype=int32, numpy=array([3, 6], dtype=int32)>]

In [27]:
tf.stack(ls[:-1])

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[1, 4],
       [2, 5]], dtype=int32)>

In [28]:
tf.stack(ls[1:])

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[2, 5],
       [3, 6]], dtype=int32)>