# tf.data API

In [1]:
import tensorflow as tf

In [3]:
daily_sales_numbers = [21,22,-108, 31, 1, 32,24,31]

# Simple tf.data.Dataset object


In [13]:
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_numbers)
tf_dataset

<TensorSliceDataset shapes: (), types: tf.int32>

In [14]:
print("you can iterate through out your dataset")
for sales in tf_dataset:
    print(sales.numpy())

print("you can use a numpy iterator")
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

print("you can select only a number of elements")
for sales in tf_dataset.take(3):
    print(sales)
    


you can iterate through out your dataset
21
22
-108
31
1
32
24
31
you can use a numpy iterator
21
22
-108
31
1
32
24
31
you can select only a number of elements
tf.Tensor(21, shape=(), dtype=int32)
tf.Tensor(22, shape=(), dtype=int32)
tf.Tensor(-108, shape=(), dtype=int32)


## Filter values from your dataset

In [16]:
print("Filter negative values")
tf_dataset_non_negative = tf_dataset.filter(lambda x: x>0)
for sales in tf_dataset_non_negative:
    print(sales)

Filter negative values
tf.Tensor(21, shape=(), dtype=int32)
tf.Tensor(22, shape=(), dtype=int32)
tf.Tensor(31, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(32, shape=(), dtype=int32)
tf.Tensor(24, shape=(), dtype=int32)
tf.Tensor(31, shape=(), dtype=int32)


# Map values from your dataset

In [20]:
def currency_mx(input_val):
    return input_val * 20

tf_dataset_mx = tf_dataset_non_negative.map(currency_mx)
for sales in tf_dataset_mx:
    print(sales)

tf.Tensor(420, shape=(), dtype=int32)
tf.Tensor(440, shape=(), dtype=int32)
tf.Tensor(620, shape=(), dtype=int32)
tf.Tensor(20, shape=(), dtype=int32)
tf.Tensor(640, shape=(), dtype=int32)
tf.Tensor(480, shape=(), dtype=int32)
tf.Tensor(620, shape=(), dtype=int32)


# Shuffle
how it works: https://stackoverflow.com/questions/53514495/what-does-batch-repeat-and-shuffle-do-with-tensorflow-dataset

In [21]:
tf_dataset_shuffled = tf_dataset_mx.shuffle(3)
for sales in tf_dataset_shuffled:
    print(sales)

tf.Tensor(420, shape=(), dtype=int32)
tf.Tensor(440, shape=(), dtype=int32)
tf.Tensor(20, shape=(), dtype=int32)
tf.Tensor(480, shape=(), dtype=int32)
tf.Tensor(640, shape=(), dtype=int32)
tf.Tensor(620, shape=(), dtype=int32)
tf.Tensor(620, shape=(), dtype=int32)


# Batching
Great for distributed environments

In [24]:
tf_dataset_batched = tf_dataset_mx.batch(3)
for sales_batch in tf_dataset_batched:
    print(sales_batch.numpy())

[420 440 620]
[ 20 640 480]
[620]


# Putting everything together

In [25]:
tf_dataset_processed = tf.data.Dataset.from_tensor_slices(daily_sales_numbers)
tf_dataset_processed = tf_dataset_processed.filter(lambda x: x>0).map