In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
#the following dataset is a scalar

dataset = tf.data.Dataset.from_tensor_slices([8,3,0,8,2,1])
dataset


<TensorSliceDataset shapes: (), types: tf.int32>

In [3]:
for element in dataset:
    print(element.numpy())

8
3
0
8
2
1


In [4]:
#reduce the elemnts to produce a single digit
dataset.reduce(0, lambda state, value: state + value).numpy()

22

In [5]:
dataset1 = tf.data.Dataset.from_tensor_slices(tf.random.uniform([4, 10]))
dataset1.element_spec

TensorSpec(shape=(10,), dtype=tf.float32, name=None)

In [6]:
dataset2 = tf.data.Dataset.from_tensor_slices((tf.random.uniform([4]), tf.random.uniform([4, 100], maxval = 100, dtype = tf.int32)))

In [7]:
dataset2.element_spec

(TensorSpec(shape=(), dtype=tf.float32, name=None),
 TensorSpec(shape=(100,), dtype=tf.int32, name=None))

In [8]:
dataset3 = tf.data.Dataset.zip((dataset1, dataset2))
dataset3.element_spec

(TensorSpec(shape=(10,), dtype=tf.float32, name=None),
 (TensorSpec(shape=(), dtype=tf.float32, name=None),
  TensorSpec(shape=(100,), dtype=tf.int32, name=None)))

In [9]:
dataset4 = tf.data.Dataset.from_tensors(tf.SparseTensor(indices = [[0, 0], [1, 2]], values = [1, 2], dense_shape = [3, 4]))
dataset4.element_spec

SparseTensorSpec(TensorShape([3, 4]), tf.int32)

In [10]:
#batch dataset

In [11]:
inc_dataset = tf.data.Dataset.range(100)
dec_dataset = tf.data.Dataset.range(0, -100, -1)
dataset = tf.data.Dataset.zip((inc_dataset, dec_dataset))
batched_dataset = dataset.batch(4)

it = iter(batched_dataset)
for batch in batched_dataset.take(5):
    print([arr.numpy() for arr in batch])

[array([0, 1, 2, 3], dtype=int64), array([ 0, -1, -2, -3], dtype=int64)]
[array([4, 5, 6, 7], dtype=int64), array([-4, -5, -6, -7], dtype=int64)]
[array([ 8,  9, 10, 11], dtype=int64), array([ -8,  -9, -10, -11], dtype=int64)]
[array([12, 13, 14, 15], dtype=int64), array([-12, -13, -14, -15], dtype=int64)]
[array([16, 17, 18, 19], dtype=int64), array([-16, -17, -18, -19], dtype=int64)]


In [12]:
batched_dataset

<BatchDataset shapes: ((None,), (None,)), types: (tf.int64, tf.int64)>

In [13]:
batched_dataset = dataset.batch(7, drop_remainder = True)

In [14]:
batched_dataset

<BatchDataset shapes: ((7,), (7,)), types: (tf.int64, tf.int64)>

In [15]:
#for the dataset of different dimentions

dataset = tf.data.Dataset.range(100)


dataset = dataset.map(lambda x: tf.fill([tf.cast(x, tf.int32)], x))

#padded batch pad the values if there are not in the dataset with 0s
dataset = dataset.padded_batch(4, padded_shapes = (None, ))

for batch in dataset.take(5):
    print(batch.numpy())
    print()

[[0 0 0]
 [1 0 0]
 [2 2 0]
 [3 3 3]]

[[4 4 4 4 0 0 0]
 [5 5 5 5 5 0 0]
 [6 6 6 6 6 6 0]
 [7 7 7 7 7 7 7]]

[[ 8  8  8  8  8  8  8  8  0  0  0]
 [ 9  9  9  9  9  9  9  9  9  0  0]
 [10 10 10 10 10 10 10 10 10 10  0]
 [11 11 11 11 11 11 11 11 11 11 11]]

[[12 12 12 12 12 12 12 12 12 12 12 12  0  0  0]
 [13 13 13 13 13 13 13 13 13 13 13 13 13  0  0]
 [14 14 14 14 14 14 14 14 14 14 14 14 14 14  0]
 [15 15 15 15 15 15 15 15 15 15 15 15 15 15 15]]

[[16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16  0  0  0]
 [17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17  0  0]
 [18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18  0]
 [19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19]]



In [16]:
#high level apis

train, test = tf.keras.datasets.fashion_mnist.load_data()
images, labels = train
images = images/255.0
labels = labels.astype(np.int32)

In [17]:
fmnist_train_ds = tf.data.Dataset.from_tensor_slices((images, labels))
fmnist_train_ds = fmnist_train_ds.shuffle(5000).batch(32)
model = tf.keras.Sequential([tf.keras.layers.Flatten(), tf.keras.layers.Dense(10, activation = 'softmax')])

model.compile(optimizer = 'adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(), metrics = ['accuracy'])

In [18]:
model.fit(fmnist_train_ds, epochs = 2)

Train for 1875 steps
Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x176b6a4d550>

In [19]:
#passing an infinite dataset by passing repeat with no arguments by telling ow much steps of datasets we need to train
model.fit(fmnist_train_ds.repeat(), epochs = 2, steps_per_epoch = 20)

Train for 20 steps
Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x176b68b7d30>

In [20]:
loss, accuracy = model.evaluate(fmnist_train_ds)
print(loss, accuracy)

0.4532144069433212 0.84386665


In [24]:
#if the dataset is very big we need to pass the steps arguments

loss, accuracy = model.evaluate(fmnist_train_ds.repeat(), steps = 20)



In [25]:
test_ds = tf.data.Dataset.from_tensor_slices(images).batch(32)
results = model.predict(test_ds, steps = 20)
print(results)

[[3.2264085e-09 3.9397703e-09 1.1981076e-07 ... 3.5013054e-03
  7.6725380e-05 9.9420977e-01]
 [9.5420969e-01 7.9144706e-07 4.3898327e-03 ... 9.8071334e-13
  3.3311949e-06 3.6977407e-10]
 [2.2665569e-01 1.2841074e-01 7.4463524e-02 ... 5.7009218e-04
  2.9183154e-03 4.9994915e-04]
 ...
 [7.8410165e-05 5.1918214e-05 2.6010606e-01 ... 2.9603284e-10
  2.1594396e-04 4.8054010e-09]
 [7.5236679e-04 9.7521712e-05 7.7512777e-01 ... 1.3478642e-09
  1.3347052e-03 1.1039691e-08]
 [4.3377094e-02 1.0681308e-03 4.4301962e-03 ... 2.0003069e-06
  5.7492599e-02 1.8313496e-08]]


In [37]:
test_ds = tf.data.Dataset.from_tensor_slices(images).batch(32)
print(test_ds)
print(fmnist_train_ds)

<BatchDataset shapes: (None, 28, 28), types: tf.float64>
<BatchDataset shapes: ((None, 28, 28), (None,)), types: (tf.float64, tf.int32)>
