Understanding the Tensorflow Pipeline Implementation

In [50]:
import tensorflow as tf
import os

In [3]:
# tf dataset creation
sample_sales_data = [30.0, 45.0, 23.0, 67.0, 34.0, -89.0, 12.0, 49.0, 38.0, 90.0, 56.0, 78.0, -12.0, 34.0, 23.0]
tf_dataset = tf.data.Dataset.from_tensor_slices(sample_sales_data)
tf_dataset

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.float32, name=None)>

In [4]:
# Iteration using Numpy Iterator
for data in tf_dataset.as_numpy_iterator():
    print(data)

30.0
45.0
23.0
67.0
34.0
-89.0
12.0
49.0
38.0
90.0
56.0
78.0
-12.0
34.0
23.0


In [5]:
# Iterating through first x elements
for data in tf_dataset.take(5):
    print(data.numpy())

30.0
45.0
23.0
67.0
34.0


In [6]:
def print_dataset(ds):
    for data in ds.as_numpy_iterator():
        print(data)

In [7]:
# Filter function
tf_dataset = tf_dataset.filter(lambda x: x > 0)
print_dataset(tf_dataset)

30.0
45.0
23.0
67.0
34.0
12.0
49.0
38.0
90.0
56.0
78.0
34.0
23.0


In [8]:
# Shuffle Function
tf_dataset = tf_dataset.shuffle(buffer_size=5)
print_dataset(tf_dataset)

23.0
34.0
67.0
30.0
45.0
90.0
56.0
34.0
49.0
12.0
78.0
38.0
23.0


In [9]:
# Batching
batch_tf_dataset = tf_dataset.batch(4)
print_dataset(batch_tf_dataset)

[45. 12. 30. 49.]


[90. 67. 34. 38.]
[78. 34. 23. 23.]
[56.]


In [10]:
# Performing all operations together
final_tf_dataset = tf_dataset.filter(lambda x: x > 0).shuffle(buffer_size=5).batch(4)
print_dataset(final_tf_dataset)

[67. 45. 23. 12.]
[56. 34. 49. 90.]
[23. 30. 34. 38.]
[78.]


Using Tensorflow dataset oprations to create training data of images

In [19]:
img_dir = '../CNN/datasets/flower_photos'
images_ds = tf.data.Dataset.list_files(img_dir + '/*/*', shuffle=False)
print(len(images_ds))
print(type(images_ds))

3670
<class 'tensorflow.python.data.ops.from_tensor_slices_op._TensorSliceDataset'>


In [49]:
images_ds = images_ds.shuffle(buffer_size=100)
for file in images_ds.take(10):
    print(file.numpy())

b'..\\CNN\\datasets\\flower_photos\\daisy\\4923279674_e7f8e70794_n.jpg'
b'..\\CNN\\datasets\\flower_photos\\daisy\\8619103877_d8c82c5f34_n.jpg'
b'..\\CNN\\datasets\\flower_photos\\daisy\\9595857626_979c45e5bf_n.jpg'
b'..\\CNN\\datasets\\flower_photos\\dandelion\\3005677730_2662753d3f_m.jpg'
b'..\\CNN\\datasets\\flower_photos\\daisy\\4333085242_bbeb3e2841_m.jpg'
b'..\\CNN\\datasets\\flower_photos\\dandelion\\13900486390_5a25785645_n.jpg'
b'..\\CNN\\datasets\\flower_photos\\dandelion\\10919961_0af657c4e8.jpg'
b'..\\CNN\\datasets\\flower_photos\\daisy\\13583238844_573df2de8e_m.jpg'
b'..\\CNN\\datasets\\flower_photos\\dandelion\\13887031789_97437f246b.jpg'
b'..\\CNN\\datasets\\flower_photos\\daisy\\8120563761_ed5620664f_m.jpg'


In [58]:
class_set = set()
for img_path in images_ds.as_numpy_iterator():
    class_name = os.path.basename(os.path.dirname(img_path))
    class_set.add(class_name)
    
class_name = [x.decode('utf-8') for x in class_set]
class_name

['sunflowers', 'tulips', 'roses', 'daisy', 'dandelion']

In [83]:
train_size = int(0.8 * len(images_ds))
train_ds = images_ds.take(train_size)
test_ds = images_ds.skip(train_size)
print(len(train_ds))
print(len(test_ds))

2936
734


In [84]:
def create_img_labesl(file_path):
    label = tf.strings.split(file_path, os.path.sep)[-2]
    img = tf.io.read_file(file_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [180, 180])
    return img, label

def scale_image(image, label):
    image = image / 255.0
    return image, label

In [107]:
# testing the functions
img, label = create_img_labesl(images_ds.as_numpy_iterator().next())
img = scale_image(img, label)
print(label.numpy())
img

b'daisy'


(<tf.Tensor: shape=(180, 180, 3), dtype=float32, numpy=
 array([[[0.99866563, 0.99866563, 0.99866563],
         [0.9877451 , 0.9877451 , 0.9877451 ],
         [0.98379624, 0.98379624, 0.98379624],
         ...,
         [0.9814818 , 0.9814818 , 0.97363865],
         [0.9799834 , 0.9799834 , 0.9729573 ],
         [0.9979031 , 0.9979031 , 0.9979031 ]],
 
        [[0.9976307 , 0.9976307 , 0.9976307 ],
         [0.9946895 , 0.9946895 , 0.9946895 ],
         [0.9857026 , 0.9857026 , 0.9857026 ],
         ...,
         [0.98777235, 0.98777235, 0.98287034],
         [0.9870098 , 0.9870098 , 0.9861928 ],
         [0.9961874 , 0.9961874 , 0.99439   ]],
 
        [[0.9966775 , 0.9966775 , 0.9966775 ],
         [0.9987745 , 0.9987745 , 0.9987745 ],
         [0.97644335, 0.97644335, 0.97644335],
         ...,
         [0.98082805, 0.98082805, 0.9854576 ],
         [0.9870098 , 0.9870098 , 0.9870098 ],
         [0.99251086, 0.99251086, 0.9858115 ]],
 
        ...,
 
        [[0.9950708 , 0.9950708 

In [108]:
train_ds = train_ds.map(create_img_labesl)
train_ds = train_ds.map(scale_image)
test_ds = test_ds.map(create_img_labesl)
test_ds = test_ds.map(scale_image)

In [114]:
for img, label in train_ds.take(5):
    print("Image shape: ", img.numpy()[0][0]) # print first pixel rgb scaled values
    print("Label: ", label.numpy())

Image shape:  [0.04313726 0.04313726 0.05098039]
Label:  b'daisy'
Image shape:  [0.37625092 0.4154666  0.07429012]
Label:  b'daisy'
Image shape:  [0.01285403 0.04509804 0.00016945]
Label:  b'daisy'
Image shape:  [0.38588235 0.26154685 0.01215686]
Label:  b'daisy'
Image shape:  [0.02745098 0.03921569 0.00392157]
Label:  b'daisy'
