In [2]:
import tensorflow as tf

In [4]:
sales_data = [10,-20,33,120,-40,4,65,90,-23,83,-1,34,-31]


In [5]:
# Saving the data into dataset

tf_dataset = tf.data.Dataset.from_tensor_slices(sales_data)

In [6]:
tf_dataset

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

In [7]:
# Printing the dataset

for sales in tf_dataset:
    print(sales)

tf.Tensor(10, shape=(), dtype=int32)
tf.Tensor(-20, shape=(), dtype=int32)
tf.Tensor(33, shape=(), dtype=int32)
tf.Tensor(120, shape=(), dtype=int32)
tf.Tensor(-40, shape=(), dtype=int32)
tf.Tensor(4, shape=(), dtype=int32)
tf.Tensor(65, shape=(), dtype=int32)
tf.Tensor(90, shape=(), dtype=int32)
tf.Tensor(-23, shape=(), dtype=int32)
tf.Tensor(83, shape=(), dtype=int32)
tf.Tensor(-1, shape=(), dtype=int32)
tf.Tensor(34, shape=(), dtype=int32)
tf.Tensor(-31, shape=(), dtype=int32)


In [8]:
type(tf_dataset)

tensorflow.python.data.ops.from_tensor_slices_op._TensorSliceDataset

In [9]:
for sales in tf_dataset:
    print(sales.numpy())

10
-20
33
120
-40
4
65
90
-23
83
-1
34
-31


In [10]:
# Lets use filter function to remove negative numbers as sales cannot be negative

tf_dataset = tf_dataset.filter(lambda x:x>0)

for sales in tf_dataset:
    print(sales.numpy())

10
33
120
4
65
90
83
34


In [11]:
# Now lets consider all these sales are in US dollars and I need to convert it into rupees so I need to multiply it with 88

tf_dataset = tf_dataset.map(lambda x:x*88)

In [12]:
for sales in tf_dataset:
    print(sales.numpy())

880
2904
10560
352
5720
7920
7304
2992


In [13]:
# Also lets shyffle the sales 

tf_dataset = tf_dataset.shuffle(3)
for sales in tf_dataset:
    print(sales.numpy())

2904
352
5720
10560
880
7304
7920
2992


In [14]:
# Now lets do all the operations in one line

new_sales_data = [1,2,3,4,5,6,7,8,9,-10,-20,-30,-40]

new_tf_dataset = tf.data.Dataset.from_tensor_slices(new_sales_data)

new_tf_dataset = new_tf_dataset.filter(lambda x:x>0).map(lambda y:y*88).shuffle(3)

In [15]:
for sale in new_tf_dataset:
    print(sale.numpy())

88
264
176
440
616
352
528
704
792


In [16]:
# Now lets do the same with some images

tf_images = tf.data.Dataset.list_files("data/images/*/*", shuffle=False)

In [17]:
for items in tf_images.take(5):
    print(items.numpy())

b'data\\images\\cat\\20 Reasons Why Cats Make the Best Pets....jpg'
b'data\\images\\cat\\7 Foods Your Cat Can_t Eat.jpg'
b'data\\images\\cat\\A cat appears to have caught the....jpg'
b'data\\images\\cat\\Adopt-A-Cat Month\xc2\xae - American Humane....jpg'
b'data\\images\\cat\\All About Your Cat_s Tongue.jpg'


In [18]:
# Now lets shuffle the dataset as we need to divide it into train and test dataset

tf_images = tf_images.shuffle(200)
for items in tf_images.take(10):
    print(items.numpy())

b'data\\images\\dog\\11 Things Humans Do That Dogs Hate.jpg'
b'data\\images\\cat\\Soon_ the internet will make its own....jpg'
b'data\\images\\cat\\What to do if your cat is marking....jpg'
b'data\\images\\dog\\356 Free Dog Stock Photos - CC0 Images.jpg'
b'data\\images\\dog\\scientists explain puppy dog eyes....jpg'
b'data\\images\\dog\\45 Best Large Dog Breeds - Top Big Dogs_yyth....jpg'
b'data\\images\\dog\\8 amazing Indian dog breeds that....png'
b'data\\images\\dog\\List of Dog Breeds _ Petfinder.jpg'
b'data\\images\\dog\\AKC Pet Insurance _ Health Insurance....png'
b'data\\images\\dog\\Trained dogs can smell coronavirus in....jpg'


In [19]:
# Now lets split the dataset into train and test

size = len(tf_images)
print(size)

130


In [20]:
# we will divide the train test into 80 - 20 split

train_size = int(size*0.8)

train_ds = tf_images.take(train_size)
test_ds = tf_images.skip(train_size)

In [21]:
print("Size of train and test is", len(train_ds), "and", len(test_ds))

Size of train and test is 104 and 26


In [22]:
my_s = "b'data\\images\\dog\\Welcoming Your Adopted Dog Into Your....jpg'"

print(my_s.split("\\"))

["b'data", 'images', 'dog', "Welcoming Your Adopted Dog Into Your....jpg'"]


In [23]:
my_s.split("\\")[-2]

'dog'

In [24]:
# Now for training we need the labels as well
# we have two labes cat and dog but how we will retrieve it
# Simple we will get it from the folders in which the individual images are saved in 

# For example
my_s = "b'data\\images\\dog\\Welcoming Your Adopted Dog Into Your....jpg'"
print(my_s.split("\\"))
print(my_s.split("\\")[-2])


["b'data", 'images', 'dog', "Welcoming Your Adopted Dog Into Your....jpg'"]
dog


In [25]:
# Now lets create a function to do that

def get_labels():

    ans = []
    for images in tf_images:
        actual_labels = str(images.numpy()).split("\\\\")[-2]
        ans.append(actual_labels)
    print("Labels created successfully")
    return ans




In [26]:
labels = get_labels()

Labels created successfully


In [27]:
print(labels[:10])
print(len(labels))

['dog', 'cat', 'cat', 'dog', 'dog', 'dog', 'dog', 'dog', 'cat', 'dog']
130


In [28]:
# Now lets modify this function to create labels for train and test


def train_test_labels(ds):
    ans = []
    for images in ds:
        actual_labels = str(images.numpy()).split("\\\\")[-2]
        ans.append(actual_labels)
    print("Labels created successfully")
    return ans
    
    

In [29]:
train_labels = train_test_labels(train_ds)

Labels created successfully


In [30]:
print(len(train_labels))

104


In [31]:
test_labels = train_test_labels(test_ds)

Labels created successfully


In [32]:
print(len(test_labels))

26


In [93]:
# Now lets apply the same pipeline to images
def process_images():

    for items in tf_images.take(3):
        img = tf.image.decode_jpeg(items.numpy())
        print(img)
        
        

In [121]:
def get_label(file_path):
    import os
    parts = tf.strings.split(file_path, os.path.sep)
    return parts[-2]

In [122]:
get_label("data\\images\\dog\\20 Reasons Why Cats Make the Best Pets....jpg")

<tf.Tensor: shape=(), dtype=string, numpy=b'dog'>

In [123]:
def process_image(file_path):
    label = get_label(file_path)
    img = tf.io.read_file(file_path) # load the raw data from the file as a string
    img = tf.image.decode_jpeg(img)
    img = tf.image.resize(img, [128, 128])
    return img, label

In [126]:
img, label = process_image("data\\images\\cat\\20 Reasons Why Cats Make the Best Pets....jpg")
img.numpy()[:2]

array([[[155.      , 186.      , 215.      ],
        [156.      , 187.      , 216.      ],
        [158.      , 189.      , 218.      ],
        [160.0039  , 189.0039  , 219.0039  ],
        [161.0039  , 190.0039  , 220.0039  ],
        [162.      , 191.      , 221.      ],
        [166.      , 193.      , 222.      ],
        [167.      , 194.      , 223.      ],
        [168.      , 195.      , 224.      ],
        [169.      , 196.      , 225.      ],
        [170.      , 197.      , 224.      ],
        [170.0039  , 197.0039  , 224.0039  ],
        [172.      , 199.      , 226.      ],
        [173.0039  , 199.0039  , 224.0039  ],
        [174.0039  , 200.0039  , 225.0039  ],
        [175.0039  , 201.0039  , 226.0039  ],
        [176.      , 202.      , 227.      ],
        [177.0039  , 203.0039  , 228.0039  ],
        [177.0039  , 203.0039  , 228.0039  ],
        [179.0039  , 203.0039  , 227.0039  ],
        [180.0039  , 204.0039  , 228.0039  ],
        [180.0039  , 204.0039  , 2

In [127]:
my_train = train_ds.map(process_image)

In [132]:
for i in my_train.take(1):
    print(i)

(<tf.Tensor: shape=(128, 128, 3), dtype=float32, numpy=
array([[[ 27.960938  ,  41.960938  ,   7.8984375 ],
        [ 76.53308   , 104.23547   ,   3.74646   ],
        [ 47.391235  ,  65.28186   ,   0.        ],
        ...,
        [ 20.742188  ,  38.742188  ,  14.615967  ],
        [ 19.296509  ,  34.405518  ,   2.7034912 ],
        [ 27.496216  ,  47.749878  ,   5.7072754 ]],

       [[ 23.094116  ,  37.562866  ,   0.6878662 ],
        [ 24.03125   ,  39.170776  ,   4.3013916 ],
        [ 10.427612  ,  26.529663  ,   0.84558105],
        ...,
        [ 48.42859   ,  71.79004   ,  16.653076  ],
        [ 18.929688  ,  44.153442  ,   2.199829  ],
        [ 55.457764  ,  94.08203   ,   2.2180176 ]],

       [[ 15.507202  ,  35.057983  ,   0.9753418 ],
        [ 49.307007  ,  77.95752   ,   0.        ],
        [ 61.085083  ,  90.08508   ,   0.6311035 ],
        ...,
        [ 51.35376   ,  74.27173   ,   0.274292  ],
        [ 66.38257   ,  95.091675  ,   0.7369385 ],
        [ 94.6699

In [134]:
for image, label in my_train.take(5):
    print("****Image: ",image.numpy()[0][0])
    print("****Label: ",label.numpy())

****Image:  [219.4961 201.4961 177.4961]
****Label:  b'dog'
****Image:  [155. 186. 215.]
****Label:  b'cat'
****Image:  [211.1836 209.1836 196.1836]
****Label:  b'cat'
****Image:  [113.  73.  12.]
****Label:  b'dog'
****Image:  [ 71. 112.  76.   0.]
****Label:  b'cat'


In [137]:
for img, lab in train_ds.map(process_image).take(5):
    print("Image**",img[0][0])
    print("Label**",lab)
    

Image** tf.Tensor([15. 12.  5.], shape=(3,), dtype=float32)
Label** tf.Tensor(b'cat', shape=(), dtype=string)
Image** tf.Tensor([197.28906 206.28906 201.75781], shape=(3,), dtype=float32)
Label** tf.Tensor(b'cat', shape=(), dtype=string)
Image** tf.Tensor([38.08435  50.112427 36.09839 ], shape=(3,), dtype=float32)
Label** tf.Tensor(b'cat', shape=(), dtype=string)
Image** tf.Tensor([254. 254. 254.], shape=(3,), dtype=float32)
Label** tf.Tensor(b'dog', shape=(), dtype=string)
Image** tf.Tensor([205.44531 191.44531 190.44531], shape=(3,), dtype=float32)
Label** tf.Tensor(b'dog', shape=(), dtype=string)


In [138]:
def scale_image(image, label):
    image = image/255
    return image, label

In [139]:
# Now lets scale the dataset

scaled_trained_ds = my_train.map(scale_image)

In [153]:
for img, lab in scaled_trained_ds.take(5):
    print("Image**",img[0][0])
    print("Label**",lab)
    
    

Image** tf.Tensor([0.5058824  0.5764706  0.24705882], shape=(3,), dtype=float32)
Label** tf.Tensor(b'cat', shape=(), dtype=string)
Image** tf.Tensor([0.62766546 0.5771446  0.5487132 ], shape=(3,), dtype=float32)
Label** tf.Tensor(b'cat', shape=(), dtype=string)
Image** tf.Tensor([0.04113051 0.00382966 0.00229779], shape=(3,), dtype=float32)
Label** tf.Tensor(b'dog', shape=(), dtype=string)
Image** tf.Tensor([0.32156864 0.32156864 0.32156864 1.        ], shape=(4,), dtype=float32)
Label** tf.Tensor(b'cat', shape=(), dtype=string)
Image** tf.Tensor([0.49607843 0.4254902  0.38627452], shape=(3,), dtype=float32)
Label** tf.Tensor(b'cat', shape=(), dtype=string)


In [150]:
for i in my_train.take(1):
    print(i)
    

(<tf.Tensor: shape=(128, 128, 3), dtype=float32, numpy=
array([[[38.40625 , 47.5     , 51.828125],
        [32.9375  , 39.9375  , 45.9375  ],
        [35.765625, 42.765625, 48.765625],
        ...,
        [39.171875, 49.421875, 57.671875],
        [43.1875  , 54.6875  , 60.6875  ],
        [49.59375 , 60.59375 , 64.59375 ]],

       [[41.296875, 50.296875, 55.296875],
        [39.1875  , 46.1875  , 52.1875  ],
        [37.1875  , 44.1875  , 50.1875  ],
        ...,
        [34.3125  , 44.3125  , 53.3125  ],
        [38.671875, 49.671875, 55.671875],
        [39.890625, 50.890625, 56.890625]],

       [[39.34375 , 48.96875 , 53.65625 ],
        [38.703125, 47.703125, 54.703125],
        [36.265625, 45.265625, 50.265625],
        ...,
        [39.6875  , 49.6875  , 58.6875  ],
        [41.015625, 54.015625, 62.015625],
        [42.4375  , 52.4375  , 61.4375  ]],

       ...,

       [[80.046875, 76.75    , 75.515625],
        [86.296875, 82.296875, 81.296875],
        [95.984375, 86.984