# tf.data.Dataset Examples

In [2]:
import tensorflow as tf
import pathlib
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.set_printoptions(precision=4)

## Iterating Dataset elements

In [5]:
dataset = tf.data.Dataset.from_tensor_slices([8, 3, 0, 8, 2, 1])
for elem in dataset:
  print(elem.numpy())

8
3
0
8
2
1


## Sum by reduce operation

In [6]:
print(dataset.reduce(0, lambda state, value: state + value).numpy())

22


## Dataset Construction

In [21]:
dataset1 = tf.data.Dataset.from_tensor_slices(tf.random.uniform([4, 10]))

print(dataset1.element_spec)

def showDataset(ds):
    for x in dataset1:
        print(x)

showDataset(dataset1)        

TensorSpec(shape=(10,), dtype=tf.float32, name=None)
tf.Tensor([0.1236 0.156  0.6723 0.9751 0.9977 0.8021 0.4398 0.7278 0.0794 0.7184], shape=(10,), dtype=float32)
tf.Tensor([0.6871 0.2333 0.1294 0.0369 0.9083 0.5099 0.7593 0.9971 0.0115 0.1493], shape=(10,), dtype=float32)
tf.Tensor([0.8723 0.6505 0.7222 0.5397 0.4376 0.7862 0.5419 0.9505 0.0818 0.7785], shape=(10,), dtype=float32)
tf.Tensor([0.2968 0.787  0.883  0.9962 0.2794 0.9622 0.8805 0.9925 0.3021 0.9493], shape=(10,), dtype=float32)


In [18]:
dataset2 = tf.data.Dataset.from_tensor_slices(
   (tf.random.uniform([4]),
    tf.random.uniform([4, 100], maxval=100, dtype=tf.int32)))

dataset2.element_spec

(TensorSpec(shape=(), dtype=tf.float32, name=None),
 TensorSpec(shape=(100,), dtype=tf.int32, name=None))

## Zipping up multiple datasets

In [19]:
dataset3 = tf.data.Dataset.zip((dataset1, dataset2))

dataset3.element_spec

(TensorSpec(shape=(10,), dtype=tf.float32, name=None),
 (TensorSpec(shape=(), dtype=tf.float32, name=None),
  TensorSpec(shape=(100,), dtype=tf.int32, name=None)))

## Datasets random data with bounds

In [22]:
dataset1 = tf.data.Dataset.from_tensor_slices(
    tf.random.uniform([4, 10], minval=1, maxval=10, dtype=tf.int32))

showDataset(dataset1)

tf.Tensor([4 6 6 3 5 7 7 3 7 8], shape=(10,), dtype=int32)
tf.Tensor([9 2 9 6 5 5 9 5 6 1], shape=(10,), dtype=int32)
tf.Tensor([1 1 8 4 8 6 2 1 8 7], shape=(10,), dtype=int32)
tf.Tensor([6 7 5 7 2 3 3 3 1 5], shape=(10,), dtype=int32)


## CSV Data

In [24]:
titanic_file = tf.keras.utils.get_file("train.csv", "https://storage.googleapis.com/tf-datasets/titanic/train.csv")
df = pd.read_csv(titanic_file, index_col=None)
df.head()

Unnamed: 0,survived,sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone
0,0,male,22.0,1,0,7.25,Third,unknown,Southampton,n
1,1,female,38.0,1,0,71.2833,First,C,Cherbourg,n
2,1,female,26.0,0,0,7.925,Third,unknown,Southampton,y
3,1,female,35.0,1,0,53.1,First,C,Southampton,n
4,0,male,28.0,0,0,8.4583,Third,unknown,Queenstown,y


In [25]:
titanic_slices = tf.data.Dataset.from_tensor_slices(dict(df))

for feature_batch in titanic_slices.take(1):
  for key, value in feature_batch.items():
    print("  {!r:20s}: {}".format(key, value))

  'survived'          : 0
  'sex'               : b'male'
  'age'               : 22.0
  'n_siblings_spouses': 1
  'parch'             : 0
  'fare'              : 7.25
  'class'             : b'Third'
  'deck'              : b'unknown'
  'embark_town'       : b'Southampton'
  'alone'             : b'n'
