<a href="https://colab.research.google.com/github/priyanshu7466/Python/blob/main/Deep_Learning/tf_data_pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy

In [2]:
daily_sales_numbers = [21, 22, -108, 31, -1, 32, 34,31]
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_numbers)
tf_dataset

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

In [3]:
for sales in tf_dataset:
  print(sales.numpy())

21
22
-108
31
-1
32
34
31


In [4]:
for sales in tf_dataset.as_numpy_iterator():
  print(sales)

21
22
-108
31
-1
32
34
31


In [5]:
for sales in tf_dataset.take(3):
  print(sales.numpy())

21
22
-108


In [6]:
tf_dataset = tf_dataset.filter(lambda x:x>0)
for sales in tf_dataset:
  print(sales.numpy())

21
22
31
32
34
31


In [7]:
tf_dataset = tf_dataset.map(lambda x:x*83)
for sales in tf_dataset:
  print(sales.numpy())

1743
1826
2573
2656
2822
2573


In [9]:
tf_dataset = tf_dataset.shuffle(3)
for sales in tf_dataset:
  print(sales.numpy())

1826
2656
2822
2573
1743
2573


In [10]:
for sales in tf_dataset.batch(2):
  print(sales.numpy())

[1743 2656]
[2573 2822]
[1826 2573]


In [11]:
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_numbers)

tf_dataset = tf_dataset.filter(lambda x:x>0).map(lambda y:y*83).shuffle(3).batch(2)
for sales in tf_dataset.as_numpy_iterator():
  print(sales)

[1743 1826]
[2822 2573]
[2656 2573]


In [12]:
image_ds = tf.data.Dataset.list_files('/content/drive/MyDrive/images/*/*', shuffle=False)

In [13]:
image_count = len(image_ds)
image_count

196

In [14]:
type(image_ds)

tensorflow.python.data.ops.from_tensor_slices_op._TensorSliceDataset

In [15]:
for file in image_ds.take(3):
  print(file.numpy())

b'/content/drive/MyDrive/images/cat/00000153-bebc-d742-a553-bffd3c270000_square.jpg'
b'/content/drive/MyDrive/images/cat/01h84ks8b3kn95ybj78q.jpg'
b'/content/drive/MyDrive/images/cat/07CAT-STRIPES-mediumSquareAt3X-v2.jpg'


In [16]:
image_ds = image_ds.shuffle(200)
for file in image_ds.take(3):
  print(file.numpy())

b'/content/drive/MyDrive/images/dog/1655430860853.jpg'
b'/content/drive/MyDrive/images/dog/maltese-portrait.jpg'
b'/content/drive/MyDrive/images/cat/african-wild-cat.jpg'


In [17]:
class_names = ["cat","dog"]

In [19]:
train_size = int(image_count*0.8)
train_ds = image_ds.take(train_size)
test_ds = image_ds.skip(train_size)

In [20]:
len(train_ds)

156

In [21]:
len(test_ds)

40

In [22]:
def get_label(file_path):
  import os
  parts = tf.strings.split(file_path, os.path.sep)
  return parts[-2]

In [23]:
get_label('/content/drive/MyDrive/images/dog/1655430860853.jpg')

<tf.Tensor: shape=(), dtype=string, numpy=b'dog'>

In [24]:
def process_image(file_path):
  label = get_label(file_path)
  img = tf.io.read_file(file_path)
  img = tf.image.decode_jpeg(img)
  img = tf.image.resize(img, [128,128])
  return img,label

In [25]:
img, label = process_image('/content/drive/MyDrive/images/dog/1655430860853.jpg')
img.numpy()[:2]

array([[[146.83203 , 129.83203 , 103.83203 ],
        [146.83203 , 129.83203 , 103.83203 ],
        [146.83203 , 129.83203 , 103.83203 ],
        [148.83203 , 129.83203 ,  99.83203 ],
        [151.41602 , 132.41602 , 102.416016],
        [151.83203 , 129.83203 , 105.83203 ],
        [150.83203 , 131.41602 , 102.66406 ],
        [150.41602 , 131.41602 , 101.416016],
        [149.      , 130.      ,  98.      ],
        [148.      , 129.      ,  97.      ],
        [149.      , 129.      ,  96.      ],
        [149.      , 129.      ,  96.      ],
        [149.      , 129.      ,  96.      ],
        [148.      , 128.      ,  93.      ],
        [148.      , 128.      ,  93.      ],
        [148.      , 128.      ,  93.      ],
        [150.      , 128.      ,  91.      ],
        [150.      , 128.      ,  91.      ],
        [150.      , 128.      ,  91.      ],
        [150.      , 126.      ,  90.      ],
        [150.      , 126.      ,  90.      ],
        [148.      , 124.      ,  

In [26]:
train_ds = train_ds.map(process_image)
test_ds = test_ds.map(process_image)

In [27]:
for image, label in train_ds.take(1):
  print("***",image)
  print("***",label)

*** tf.Tensor(
[[[ 52.015625  65.015625  55.015625]
  [ 50.441406  63.48828   53.347656]
  [ 49.75      62.75      53.75    ]
  ...
  [ 44.48047   41.73047   26.730469]
  [ 41.058594  38.308594  23.308594]
  [ 35.05078   33.039062  18.039062]]

 [[ 51.        67.        56.      ]
  [ 50.964844  66.96484   55.871094]
  [ 49.558594  65.55859   55.558594]
  ...
  [ 54.289062  48.289062  36.289062]
  [ 52.285156  46.285156  34.285156]
  [ 51.73828   45.73828   33.73828 ]]

 [[ 48.984375  70.984375  57.984375]
  [ 47.964844  70.01172   56.871094]
  [ 47.5       68.88281   57.191406]
  ...
  [ 60.828125  54.828125  42.828125]
  [ 57.85547   51.85547   39.85547 ]
  [ 56.003906  50.003906  38.003906]]

 ...

 [[113.96094  101.47266  110.234375]
  [135.4375   132.53125  139.48438 ]
  [125.86719  127.01953  128.21094 ]
  ...
  [ 84.328125  77.40625   93.25    ]
  [ 79.23828   73.23828   86.73828 ]
  [ 74.51953   71.484375  82.51953 ]]

 [[121.8125    94.8125    66.30469 ]
  [131.33203  104.3203

In [28]:
def scale(image,label):
  return image/255, label

In [29]:
train_ds = train_ds.map(scale)

In [30]:
for image, label in train_ds.take(5):
  print("***Image",image.numpy()[0][0])
  print("***Label",label.numpy())

***Image [0.14387254 0.14387254 0.14779411]
***Label b'dog'
***Image [0. 0. 0.]
***Label b'dog'
***Image [0.9019608  0.91764706 0.92941177]
***Label b'dog'
***Image [0.8485294  0.825      0.69166666]
***Label b'cat'
***Image [0.16470589 0.18039216 0.35686275]
***Label b'dog'


# Exercise

In [59]:
review_ds = tf.data.Dataset.list_files('/content/drive/MyDrive/reviews/*/*')

In [60]:
review_count = len(review_ds)
review_count

6

In [61]:
type(review_ds)

tensorflow.python.data.ops.shuffle_op._ShuffleDataset

In [62]:
for file in review_ds:
  print(file.numpy())

b'/content/drive/MyDrive/reviews/positive/pos_3.txt'
b'/content/drive/MyDrive/reviews/negative/neg_3.txt'
b'/content/drive/MyDrive/reviews/negative/neg_2.txt'
b'/content/drive/MyDrive/reviews/negative/neg_1.txt'
b'/content/drive/MyDrive/reviews/positive/pos_1.txt'
b'/content/drive/MyDrive/reviews/positive/pos_2.txt'


In [63]:
review_ds = review_ds.shuffle(2)
def get_label(file_path):
  import os
  parts = tf.strings.split(file_path, os.path.sep)
  return parts[-2]

In [64]:
get_label('/content/drive/MyDrive/reviews/negative/neg_1.txt')

<tf.Tensor: shape=(), dtype=string, numpy=b'negative'>

In [65]:
def process_reviews(file_path):
  label = get_label(file_path)
  rev = tf.io.read_file(file_path)
  return label, rev

In [66]:
label, rev = process_reviews('/content/drive/MyDrive/reviews/negative/neg_1.txt')
rev.numpy()

b"Basically there's a family where a little boy (Jake) thinks there's a zombie in his closet & his parents are fighting all the time.<br /><br />This movie is slower than a soap opera... and suddenly, Jake decides to become Rambo and kill the zombie.<br /><br />OK, first of all when you're going to make a film you must Decide if its a thriller or a drama! As a drama the movie is watchable. Parents are divorcing & arguing like in real life. And then we have Jake with his closet which totally ruins all the film! I expected to see a BOOGEYMAN similar movie, and instead i watched a drama with some meaningless thriller spots.<br /><br />3 out of 10 just for the well playing parents & descent dialogs. As for the shots with Jake: just ignore them.\n"

In [67]:
review_ds1 = review_ds.map(process_reviews)
for label, rev in review_ds1:
  print("***Label",label)
  print("***Review",rev)

***Label tf.Tensor(b'negative', shape=(), dtype=string)
***Review tf.Tensor(b"This show was an amazing, fresh & innovative idea in the 70's when it first aired. The first 7 or 8 years were brilliant, but things dropped off after that. By 1990, the show was not really funny anymore, and it's continued its decline further to the complete waste of time it is today.<br /><br />It's truly disgraceful how far this show has fallen. The writing is painfully bad, the performances are almost as bad - if not for the mildly entertaining respite of the guest-hosts, this show probably wouldn't still be on the air. I find it so hard to believe that the same creator that hand-selected the original cast also chose the band of hacks that followed. How can one recognize such brilliance and then see fit to replace it with such mediocrity? I felt I must give 2 stars out of respect for the original cast that made this show such a huge success. As it is now, the show is just awful. I can't believe it's still

In [69]:
review_ds2 = review_ds1.filter(lambda label, rev: rev!="")
for label, rev in review_ds2:
  print("***Label", label)
  print("***Review",rev)

***Label tf.Tensor(b'negative', shape=(), dtype=string)
***Review tf.Tensor(b"This show was an amazing, fresh & innovative idea in the 70's when it first aired. The first 7 or 8 years were brilliant, but things dropped off after that. By 1990, the show was not really funny anymore, and it's continued its decline further to the complete waste of time it is today.<br /><br />It's truly disgraceful how far this show has fallen. The writing is painfully bad, the performances are almost as bad - if not for the mildly entertaining respite of the guest-hosts, this show probably wouldn't still be on the air. I find it so hard to believe that the same creator that hand-selected the original cast also chose the band of hacks that followed. How can one recognize such brilliance and then see fit to replace it with such mediocrity? I felt I must give 2 stars out of respect for the original cast that made this show such a huge success. As it is now, the show is just awful. I can't believe it's still

In [70]:
final_ds = review_ds.map(process_reviews).filter(lambda label, rev: rev!="").shuffle(3)
for label, rev in final_ds:
  print("***Label",label)
  print("***Review",rev)

***Label tf.Tensor(b'negative', shape=(), dtype=string)
***Review tf.Tensor(b"Basically there's a family where a little boy (Jake) thinks there's a zombie in his closet & his parents are fighting all the time.<br /><br />This movie is slower than a soap opera... and suddenly, Jake decides to become Rambo and kill the zombie.<br /><br />OK, first of all when you're going to make a film you must Decide if its a thriller or a drama! As a drama the movie is watchable. Parents are divorcing & arguing like in real life. And then we have Jake with his closet which totally ruins all the film! I expected to see a BOOGEYMAN similar movie, and instead i watched a drama with some meaningless thriller spots.<br /><br />3 out of 10 just for the well playing parents & descent dialogs. As for the shots with Jake: just ignore them.\n", shape=(), dtype=string)
***Label tf.Tensor(b'negative', shape=(), dtype=string)
***Review tf.Tensor(b"This show was an amazing, fresh & innovative idea in the 70's when 