In [1]:
import tensorflow as tf

In [2]:
daily_sales_data = [21,22,-108,9,40,53,-1,31]

In [3]:
# convert the data into tensorflow dataset
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_data)
tf_dataset

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

In [4]:
for sales_nums in tf_dataset.as_numpy_iterator():
    print(sales_nums)

21
22
-108
9
40
53
-1
31


In [5]:
for sales in tf_dataset.take(3):
    print(sales.numpy())

21
22
-108


In [6]:
# filtering -ve values as the sales cannot be -ve
tf_dataset = tf_dataset.filter(lambda x: x>0)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

21
22
9
40
53
31


In [7]:
# convert sales in USD to INR
tf_dataset = tf_dataset.map(lambda x: x*85)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

1785
1870
765
3400
4505
2635


In [8]:
# shuffle the tf dataset
tf_dataset = tf_dataset.shuffle(buffer_size=3)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

1870
1785
765
4505
2635
3400


In [9]:
# batching
for sales_batch in tf_dataset.batch(4):
    print(sales_batch.numpy())

[1870  765 4505 1785]
[2635 3400]


In [10]:
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_data)

tf_dataset = tf_dataset.filter(lambda x: x>0).map(lambda y: y*85).shuffle(3).batch(2)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

[1870  765]
[1785 2635]
[4505 3400]


# Images
we are not doing any training or building model, we are just building a tensorflow input pipeline that we will be using for later works

In [12]:
images_ds = tf.data.Dataset.list_files("images/*/*",shuffle=False)
for files in images_ds.take(5):
    print(files)

tf.Tensor(b'images\\cat\\DE_Egypt5843MW_0.jpg', shape=(), dtype=string)
tf.Tensor(b'images\\cat\\FELV-cat.jpg', shape=(), dtype=string)
tf.Tensor(b'images\\cat\\Facts-About-Ginger-CatsHERO.jpg', shape=(), dtype=string)
tf.Tensor(b'images\\cat\\cat-quotes-1543599392.jpg', shape=(), dtype=string)
tf.Tensor(b'images\\cat\\cool_cat.jpg', shape=(), dtype=string)


In [13]:
images_ds = images_ds.shuffle(20)

for file in images_ds.take(3):
    print(file.numpy())

b'images\\cat\\cool_cat.jpg'
b'images\\cat\\DE_Egypt5843MW_0.jpg'
b'images\\cat\\hq720.jpg'


In [14]:
image_count = len(images_ds)
image_count

117

In [15]:
train_size = int(image_count*0.8) # take 80% of images for training

train_ds = images_ds.take(train_size)
test_ds = images_ds.skip(train_size)

In [16]:
len(train_ds)

93

In [17]:
len(test_ds)

24

In [18]:
s = 'images\\cat\\hq7202.jpg'

s.split("\\")[-2]

'cat'

In [19]:
import os
def get_label(filepath):
    # return filepath.split('\\')[-2], split does not work for tensor objects
    return tf.strings.split(filepath,os.path.sep)[-2]

In [20]:
def process_image(filepath):
    label = get_label(filepath)
    img = tf.io.read_file(filepath)
    img = tf.image.decode_jpeg(img)
    img = tf.image.resize(img,[128,128])
    
    return img,label

In [21]:
for i in train_ds.take(5):
    print(i)

tf.Tensor(b'images\\cat\\essay-wood-cats-149626371.jpg', shape=(), dtype=string)
tf.Tensor(b'images\\cat\\european-shorthair-8601492_640.jpg', shape=(), dtype=string)
tf.Tensor(b'images\\cat\\Facts-About-Ginger-CatsHERO.jpg', shape=(), dtype=string)
tf.Tensor(b'images\\cat\\gettyimages-1067956982.jpg', shape=(), dtype=string)
tf.Tensor(b'images\\cat\\image13.jpeg', shape=(), dtype=string)


In [22]:
train_ds = train_ds.map(process_image)
for img,label in train_ds.take(5):
    print("Image: ",img)
    print("Label: ",label)

Image:  tf.Tensor(
[[[ 33.       53.       62.     ]
  [ 33.       53.       62.     ]
  [ 33.       53.       62.     ]
  ...
  [112.      105.       95.     ]
  [112.      105.       95.     ]
  [112.      105.       95.     ]]

 [[ 33.       52.       66.     ]
  [ 33.       52.       66.     ]
  [ 33.       52.21875  65.5625 ]
  ...
  [110.      104.       98.     ]
  [110.      104.       98.     ]
  [111.      104.       98.     ]]

 [[ 32.       51.       66.     ]
  [ 32.9375   52.875    64.0625 ]
  [ 33.       53.       62.     ]
  ...
  [105.      106.      100.     ]
  [106.125   105.      100.     ]
  [109.      104.      100.     ]]

 ...

 [[  3.       15.       39.     ]
  [  7.28125  14.46875  40.40625]
  [ 16.5625   22.5625   48.5625 ]
  ...
  [ 28.9375   34.9375   50.9375 ]
  [ 36.21875  40.09375  52.09375]
  [ 49.65625  50.       56.96875]]

 [[  3.3125   14.3125   36.3125 ]
  [  3.625    14.625    36.625  ]
  [  6.34375  16.5      38.78125]
  ...
  [ 29.71875  31.28

In [23]:
def scale(image,label):
    return image/255,label

In [24]:
train_ds = train_ds.map(scale)
for image,label in train_ds.take(5):
    print("Image: ",image.numpy())
    print("Label: ",label.numpy())

Image:  [[[0.6039216  0.7894148  0.9613741 ]
  [0.6019608  0.7921569  0.96666664]
  [0.5931296  0.7931296  0.96175706]
  ...
  [0.2891927  0.45292586 0.17745864]
  [0.2647059  0.46413144 0.21004136]
  [0.35922182 0.57412684 0.4715916 ]]

 [[0.6051011  0.79725796 0.95804226]
  [0.6099954  0.79430914 0.96685815]
  [0.6098039  0.80196077 0.9666973 ]
  ...
  [0.26174173 0.42054993 0.20193015]
  [0.26059282 0.41707262 0.20846353]
  [0.24018842 0.46666667 0.3221737 ]]

 [[0.60706186 0.7952972  0.95804226]
  [0.6101869  0.8015778  0.9609758 ]
  [0.61179537 0.8019761  0.9657246 ]
  ...
  [0.28037685 0.44705883 0.26570925]
  [0.26489738 0.4855086  0.34944853]
  [0.26489738 0.5476486  0.49765626]]

 ...

 [[0.36373314 0.36000305 0.3764859 ]
  [0.2684819  0.27240348 0.2096584 ]
  [0.23426011 0.23920037 0.16371784]
  ...
  [0.59396446 0.524349   0.35280332]
  [0.6827359  0.60234374 0.4199908 ]
  [0.57372856 0.5158931  0.3463082 ]]

 [[0.4917739  0.5286458  0.63041514]
  [0.24265471 0.24949448 0.23

# Excercise

In [26]:
data = tf.data.Dataset.list_files('reviews/*/*',shuffle=False)
data

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>

In [27]:
for text in data:
    print(text)

tf.Tensor(b'reviews\\negative\\neg_1.txt', shape=(), dtype=string)
tf.Tensor(b'reviews\\negative\\neg_2.txt', shape=(), dtype=string)
tf.Tensor(b'reviews\\negative\\neg_3.txt', shape=(), dtype=string)
tf.Tensor(b'reviews\\positive\\pos_1.txt', shape=(), dtype=string)
tf.Tensor(b'reviews\\positive\\pos_2.txt', shape=(), dtype=string)
tf.Tensor(b'reviews\\positive\\pos_3.txt', shape=(), dtype=string)


<h3>Reading text and labels from file</h3>

In [29]:
def getLabel(file_path):
    return tf.strings.split(file_path,os.path.sep)[-2]

In [30]:
def read_text(file_path):
    label = getLabel(file_path)
    text = tf.io.read_file(file_path)
    return text,label

In [31]:
data = data.map(read_text)

In [32]:
for text,label in data:
    print("Review: ",text.numpy())
    print("Label: ",label.numpy())

Review:  b"Basically there's a family where a little boy (Jake) thinks there's a zombie in his closet & his parents are fighting all the time.<br /><br />This movie is slower than a soap opera... and suddenly, Jake decides to become Rambo and kill the zombie.<br /><br />OK, first of all when you're going to make a film you must Decide if its a thriller or a drama! As a drama the movie is watchable. Parents are divorcing & arguing like in real life. And then we have Jake with his closet which totally ruins all the film! I expected to see a BOOGEYMAN similar movie, and instead i watched a drama with some meaningless thriller spots.<br /><br />3 out of 10 just for the well playing parents & descent dialogs. As for the shots with Jake: just ignore them.\n"
Label:  b'negative'
Review:  b"This show was an amazing, fresh & innovative idea in the 70's when it first aired. The first 7 or 8 years were brilliant, but things dropped off after that. By 1990, the show was not really funny anymore, a

<h3>Removing blank revies from the file</h3>

In [34]:
data = data.filter(lambda review,label: review != '')

In [72]:
for text,label in data.as_numpy_iterator():
    print("Review: ",text)
    print("Label: ",label)

Review:  b"One of the other reviewers has mentioned that after watching just 1 Oz episode you'll be hooked. They are right, as this is exactly what happened with me.<br /><br />The first thing that struck me about Oz was its brutality and unflinching scenes of violence, which set in right from the word GO. Trust me, this is not a show for the faint hearted or timid. This show pulls no punches with regards to drugs, sex or violence. Its is hardcore, in the classic use of the word.<br /><br />It is called OZ as that is the nickname given to the Oswald Maximum Security State Penitentary. It focuses mainly on Emerald City, an experimental section of the prison where all the cells have glass fronts and face inwards, so privacy is not high on the agenda. Em City is home to many..Aryans, Muslims, gangstas, Latinos, Christians, Italians, Irish and more....so scuffles, death stares, dodgy dealings and shady agreements are never far away.<br /><br />I would say the main appeal of the show is due

In [74]:
data = tf.data.Dataset.list_files("reviews/*/*",shuffle=False)

In [76]:
data = data.map(read_text).filter(lambda r,l: r!= "").shuffle(3)

In [80]:
for text,label in data.as_numpy_iterator():
    print("Review: ",text[:50])
    print("Label: ",label)

Review:  b'One of the other reviewers has mentioned that afte'
Label:  b'positive'
Review:  b'This show was an amazing, fresh & innovative idea '
Label:  b'negative'
Review:  b'A wonderful little production. <br /><br />The fil'
Label:  b'positive'
Review:  b"Basically there's a family where a little boy (Jak"
Label:  b'negative'
