<a href="https://colab.research.google.com/github/ravitata/tensorflow2/blob/master/tf-data-learn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
print (tf.__version__)

2.3.0


In [None]:
#output is scalar
dataset = tf.data.Dataset.from_tensor_slices([1,2,3,4,5]) # 1D
print(dataset)

<TensorSliceDataset shapes: (), types: tf.int32>


In [None]:
#output is 1D tensor
dataset = tf.data.Dataset.from_tensor_slices([[1,2],[3,4],[5, 6]]) # 2D 3*2
print(dataset)
print("")
print(list(dataset.as_numpy_iterator()))
print("")
for ele in dataset:
  print(ele)
  print(ele.numpy())

<TensorSliceDataset shapes: (2,), types: tf.int32>

[array([1, 2], dtype=int32), array([3, 4], dtype=int32), array([5, 6], dtype=int32)]

tf.Tensor([1 2], shape=(2,), dtype=int32)
[1 2]
tf.Tensor([3 4], shape=(2,), dtype=int32)
[3 4]
tf.Tensor([5 6], shape=(2,), dtype=int32)
[5 6]


In [None]:
dataset = tf.data.Dataset.from_tensor_slices(tf.random.normal([3, 2])) #1D tensor
print(dataset)
print("")
print(dataset.element_spec)
print("")
print(dataset.cardinality()) #Return number of elements

<TensorSliceDataset shapes: (2,), types: tf.float32>

TensorSpec(shape=(2,), dtype=tf.float32, name=None)

tf.Tensor(3, shape=(), dtype=int64)


In [None]:
dataset = tf.data.Dataset.from_tensor_slices(
    (
        [[1,2], [3,4], [5,6]],
        ['A', 'B','C']
    )
)
print(dataset.element_spec)
print("")

for elem in dataset:
  print(elem)
print("")

for elem in dataset.take(2):
  print(elem)

(TensorSpec(shape=(2,), dtype=tf.int32, name=None), TensorSpec(shape=(), dtype=tf.string, name=None))

(<tf.Tensor: shape=(2,), dtype=int32, numpy=array([1, 2], dtype=int32)>, <tf.Tensor: shape=(), dtype=string, numpy=b'A'>)
(<tf.Tensor: shape=(2,), dtype=int32, numpy=array([3, 4], dtype=int32)>, <tf.Tensor: shape=(), dtype=string, numpy=b'B'>)
(<tf.Tensor: shape=(2,), dtype=int32, numpy=array([5, 6], dtype=int32)>, <tf.Tensor: shape=(), dtype=string, numpy=b'C'>)

(<tf.Tensor: shape=(2,), dtype=int32, numpy=array([1, 2], dtype=int32)>, <tf.Tensor: shape=(), dtype=string, numpy=b'A'>)
(<tf.Tensor: shape=(2,), dtype=int32, numpy=array([3, 4], dtype=int32)>, <tf.Tensor: shape=(), dtype=string, numpy=b'B'>)


In [None]:
from tensorflow.keras.datasets import cifar10

(x_train, y_train), (x_test, y_test) = cifar10.load_data()

num_classes = 10 
y_train = tf.keras.utils.to_categorical(y_train, num_classes = num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes = num_classes )

dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
print(dataset.element_spec)
print('')
print(dataset.cardinality())


(TensorSpec(shape=(32, 32, 3), dtype=tf.uint8, name=None), TensorSpec(shape=(10,), dtype=tf.float32, name=None))

tf.Tensor(50000, shape=(), dtype=int64)


In [None]:
dataset = dataset.batch(64)

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Conv2D, Dense, Flatten
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
inputs = Input((32,32,3))
h = Conv2D(8, kernel_size=3,activation='relu')(inputs)
h = Conv2D(18, kernel_size=3, activation='relu')(h)
h = Flatten()(h)
outputs = Dense(10, activation='softmax')(h)
model = Model(inputs=inputs, outputs=outputs)

rmsprop = RMSprop(learning_rate=1e-3)
model.compile(optimizer=rmsprop, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 30, 30, 8)         224       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 28, 28, 18)        1314      
_________________________________________________________________
flatten (Flatten)            (None, 14112)             0         
_________________________________________________________________
dense (Dense)                (None, 10)                141130    
Total params: 142,668
Trainable params: 142,668
Non-trainable params: 0
_________________________________________________________________


In [None]:
print(x_train.shape)
print(y_train.shape)

(50000, 32, 32, 3)
(50000, 10)


In [None]:
# Calculate the training generator and test generator steps per epoch
batch_size=64
train_steps_per_epoch = x_train.shape[0] // batch_size
#val_steps = val_generator.n // val_generator.batch_size
#print(train_steps_per_epoch, val_steps)
print(train_steps_per_epoch)

781


In [None]:
print(dataset.element_spec)

(TensorSpec(shape=(None, 32, 32, 3), dtype=tf.uint8, name=None), TensorSpec(shape=(None, 10), dtype=tf.float32, name=None))


In [None]:
model.fit(dataset, epochs=1, steps_per_epoch = train_steps_per_epoch)



<tensorflow.python.keras.callbacks.History at 0x7f247aacada0>

In [None]:
img_datagen = ImageDataGenerator(rotation_range=20, width_shift_range=0.2)
img_datagen.fit(x_train)

dataset_1 = tf.data.Dataset.from_generator(img_datagen.flow,
                                         args = [x_train, y_train, batch_size],
                                         output_types = (tf.float32, tf.int32),
                                         output_shapes = ([batch_size, 32,32,3], [batch_size,10])
                                         ) 
                                    

In [None]:
model.fit(dataset_1, epochs=1, steps_per_epoch = train_steps_per_epoch)



<tensorflow.python.keras.callbacks.History at 0x7f247aaca9e8>

## Using map and filter functions

In [28]:
from tensorflow.keras.datasets import cifar10

(x_train, y_train), (x_test, y_test) = cifar10.load_data()
#dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))

num_classes = 10 
y_train = tf.keras.utils.to_categorical(y_train, num_classes = num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes = num_classes )

dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))

print(dataset.element_spec)
print('')
print(dataset.cardinality())

(TensorSpec(shape=(32, 32, 3), dtype=tf.uint8, name=None), TensorSpec(shape=(10,), dtype=tf.float32, name=None))

tf.Tensor(50000, shape=(), dtype=int64)


In [29]:
dataset=dataset.batch(16)
print(dataset.element_spec)

(TensorSpec(shape=(None, 32, 32, 3), dtype=tf.uint8, name=None), TensorSpec(shape=(None, 10), dtype=tf.float32, name=None))


In [30]:
def rescale(image, label):
  return image/255, label

def label_filter(image, label):
  x = tf.constant([0,0,0,0,0,0,0,0,0,1], dtype=tf.float32) #ignores label=9
  return (tf.reduce_all(tf.equal(label, x)))


In [31]:
dataset=dataset.map(rescale)
#dataset = dataset.filter(label_filter)

#dataset = dataset.shuffle(256)
#dataset = dataset.batch(64,drop_remainder=True )
#dataset = dataset.repeat(5) #epochs

print(dataset.element_spec)
print('')
print(dataset.cardinality())

(TensorSpec(shape=(None, 32, 32, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 10), dtype=tf.float32, name=None))

tf.Tensor(3125, shape=(), dtype=int64)


In [32]:
rec = 0
for elem in dataset:
  rec = rec + 1

#This would give 3125 because the batch size is set to 16 above. 3125*16=50000!!
print('Number of rows = ',rec)

Number of rows =  3125


In [33]:
print(tf.data.experimental.INFINITE_CARDINALITY)
print(tf.data.experimental.UNKNOWN_CARDINALITY)

-1
-2


In [34]:
import numpy as np
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Conv2D, Dense, Flatten
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [35]:
inputs = Input((32,32,3))
h = Conv2D(8, kernel_size=3,activation='relu')(inputs)
h = Conv2D(18, kernel_size=3, activation='relu')(h)
h = Flatten()(h)
outputs = Dense(10, activation='softmax')(h)
model = Model(inputs=inputs, outputs=outputs)

rmsprop = RMSprop(learning_rate=1e-3)
model.compile(optimizer=rmsprop, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "functional_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 30, 30, 8)         224       
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 28, 28, 18)        1314      
_________________________________________________________________
flatten_1 (Flatten)          (None, 14112)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                141130    
Total params: 142,668
Trainable params: 142,668
Non-trainable params: 0
_________________________________________________________________


In [36]:
dataset.repeat(5)  #The repeat 5 is same as training the model for 5 epochs
history = model.fit(dataset)



In [37]:
#Training explictly a model passing epochs then it should be like below
cnt = 0
for elem in dataset:
  cnt += 1

#This would give 3125 because the batch size is set to 16 above. 3125*16=50000!!
print('Number of rows = ', cnt)

Number of rows =  3125


In [39]:
dataset = dataset.repeat() #This mean dataset repeats infinitely
steps_per_epoch = cnt*16//5
history = model.fit(dataset, steps_per_epoch=steps_per_epoch, epochs=5)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
