# Tensorflow Input Pipeline

Tensorflow tf.Data api allows you to build a data input pipeline. Using this you can handle large dataset for your deep learning training by streaming training samples from hard disk or S3 storage. tf.data.Dataset is the main class in tf.data api. 

In [1]:
import tensorflow as tf

In [2]:
daily_sales_num = [21, 22, -108, 31, -1, 32, 34, 31]

In [3]:
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_num)
tf_dataset

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

In [4]:
for sales in tf_dataset:
    print(sales)

tf.Tensor(21, shape=(), dtype=int32)
tf.Tensor(22, shape=(), dtype=int32)
tf.Tensor(-108, shape=(), dtype=int32)
tf.Tensor(31, shape=(), dtype=int32)
tf.Tensor(-1, shape=(), dtype=int32)
tf.Tensor(32, shape=(), dtype=int32)
tf.Tensor(34, shape=(), dtype=int32)
tf.Tensor(31, shape=(), dtype=int32)


In [6]:
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

21
22
-108
31
-1
32
34
31


In [7]:
# To print first 3 elements
for sales in tf_dataset.take(3):
    print(sales.numpy())

21
22
-108


In [8]:
# To filter out data so that the element can't be negative
tf_dataset = tf_dataset.filter(lambda x: x>0)

for sales in tf_dataset.as_numpy_iterator():
    print(sales)

21
22
31
32
34
31


In [9]:
# Scaling i.e., converting US doller into Indian rupees
tf_dataset = tf_dataset.map(lambda x: x*84)

for sales in tf_dataset.as_numpy_iterator():
    print(sales)

1764
1848
2604
2688
2856
2604


In [10]:
# To rearrange the elements randomly
tf_dataset = tf_dataset.shuffle(3)

for sales in tf_dataset.as_numpy_iterator():
    print(sales)

1764
1848
2688
2604
2856
2604


In [12]:
# Batching 
for sales_batch in tf_dataset.batch(4):
    print(sales_batch.numpy())

[2604 2688 2856 1848]
[1764 2604]


In [14]:
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_num)

tf_dataset = tf_dataset.filter(lambda x: x>0).map(lambda y: y*84).shuffle(2).batch(3)

for sales in tf_dataset.as_numpy_iterator():
    print(sales)

[1764 2604 2688]
[1848 2856 2604]


## Loading Image data

In [15]:
image_ds = tf.data.Dataset.list_files('images/*/*', shuffle=False)

for file in image_ds.take(4):
    print(file.numpy())

b'images\\cat\\20 Reasons Why Cats Make the Best Pets....jpg'
b'images\\cat\\7 Foods Your Cat Can_t Eat.jpg'
b'images\\cat\\A cat appears to have caught the....jpg'
b'images\\cat\\Adopt-A-Cat Month\xc2\xae - American Humane....jpg'


In [20]:
image_count = len(image_ds)
image_count

130

In [21]:
type(image_ds)

tensorflow.python.data.ops.shuffle_op._ShuffleDataset

In [22]:
image_ds = image_ds.shuffle(200)

for file in image_ds.take(4):
    print(file.numpy())

b'images\\dog\\The Black Dog Tavern Company _ Life off....jpg'
b'images\\cat\\Want your cat to stay in purrrfect....jpg'
b'images\\dog\\best dog treats_ according to veterinarians.jpg'
b'images\\cat\\Orlando Cat Caf\xc3\xa9.png'


In [23]:
class_names = ['cat', 'dog']

In [24]:
train_size = int(image_count*0.8)

train_ds = image_ds.take(train_size)
test_ds = image_ds.skip(train_size)

In [25]:
len(train_ds)

104

In [26]:
len(test_ds)

26

In [27]:
s = 'images\\dog\\The Black Dog Tavern Company _ Life off....jpg'

s.split("\\")

['images', 'dog', 'The Black Dog Tavern Company _ Life off....jpg']

In [29]:
# To get label
s.split("\\")[-2]

'dog'

In [32]:
def get_label(file_path):
    import os
    parts = tf.strings.split(file_path, os.path.sep)
    return parts[-2]

In [33]:
for t in train_ds.take(4):
    print(t.numpy())

b'images\\dog\\Dogs caught coronavirus from their....jpg'
b'images\\cat\\What to do if your cat is marking....jpg'
b'images\\dog\\Most Expensive Dog Breeds For Pet....png'
b'images\\dog\\Best Dog & Puppy Health Insurance Plans....jpg'


In [36]:
for label in train_ds.map(get_label).take(3):
    print(label)

tf.Tensor(b'dog', shape=(), dtype=string)
tf.Tensor(b'dog', shape=(), dtype=string)
tf.Tensor(b'dog', shape=(), dtype=string)


In [37]:
def process_image(file_path):
    label = get_label(file_path)
    
    img = tf.io.read_file(file_path)
    img = tf.image.decode_jpeg(img)
    img = tf.image.resize(img, [128, 128])
    
    return img, label

In [40]:
train_ds = train_ds.map(process_image)
for img, label in train_ds.take(3):
    print("Image: ", img)
    print("Label: ", label)

Image:  tf.Tensor(
[[[254.      254.      254.     ]
  [254.      254.      254.     ]
  [254.      254.      254.     ]
  ...
  [254.      254.      254.     ]
  [254.      254.      254.     ]
  [254.      254.      254.     ]]

 [[254.      254.      254.     ]
  [254.      254.      254.     ]
  [254.      254.      254.     ]
  ...
  [254.      254.      254.     ]
  [254.      254.      254.     ]
  [254.      254.      254.     ]]

 [[254.      254.      254.     ]
  [254.      254.      254.     ]
  [254.      254.      254.     ]
  ...
  [254.      254.      254.     ]
  [254.      254.      254.     ]
  [254.      254.      254.     ]]

 ...

 [[254.      254.      254.     ]
  [254.      254.      254.     ]
  [254.      254.      254.     ]
  ...
  [252.      252.      252.     ]
  [252.92188 252.92188 252.92188]
  [252.74902 252.74902 252.74902]]

 [[254.      254.      254.     ]
  [254.      254.      254.     ]
  [254.      254.      254.     ]
  ...
  [254.      254.  

In [41]:
def scale(image, label):
    return image/255, label

In [43]:
train_ds = train_ds.map(scale)

for img, label in train_ds.take(5):
    print("***Image: ", img.numpy()[0][0])
    print("***Label: ", label.numpy())

***Image:  [0.8784314 0.8156863 0.7647059]
***Label:  b'dog'
***Image:  [0.7734375  0.74206495 0.7342218 ]
***Label:  b'cat'
***Image:  [0.3125766  0.34394914 0.35571384]
***Label:  b'dog'
***Image:  [0.6161133  0.74938536 0.84681183]
***Label:  b'dog'
***Image:  [0.09411765 0.07450981 0.05882353]
***Label:  b'dog'
