In [22]:
import tensorflow as tf
import numpy as np
import os

In [2]:
daily_sales_number = [21,22,-108,31,-1,32,34,31]

In [3]:
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_number)
tf_dataset

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

In [4]:
# to see the indivisual elements in a tf dataset you have to iterate through the dataset and apply numpy() function

#method 1
for sales in tf_dataset:
    print(sales.numpy())

21
22
-108
31
-1
32
34
31


In [5]:
# method 2
for sale in tf_dataset.as_numpy_iterator():
    print(sale)

21
22
-108
31
-1
32
34
31


In [6]:
# if you want to print selectively first elements then use take() method

for sales in tf_dataset.take(3):
    print(sales.numpy())

21
22
-108


In [7]:
# if you want to filter some elements like here you want to remove those negative elements as negative sales doesn't exists

filter_dataset = tf_dataset.filter(lambda x: x>0)
for sales in filter_dataset.as_numpy_iterator():
    print(sales)

21
22
31
32
34
31


In [8]:
# if you want to do feature engineering in every element , you can use map function

converted_tp_BD_currency = filter_dataset.map(lambda x: x*120)
for sales in converted_tp_BD_currency:
    print(sales.numpy())

2520
2640
3720
3840
4080
3720


In [9]:
#if you want to shuffle elements us shuffle function

shuffled_dataset = converted_tp_BD_currency.shuffle(3)
for sales in shuffled_dataset:
    print(sales.numpy())

2640
3720
4080
3720
3840
2520


In [10]:
# you can also create batches of dataset

for sales in shuffled_dataset.batch(2):
    print(sales.numpy())

[2640 3840]
[2520 3720]
[4080 3720]


In [11]:
# Now you can do all these things in a single line

new_tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_number)

In [12]:
etl_dataset = new_tf_dataset.filter(lambda x: x>0).map(lambda y: y*120).shuffle(2).batch(2)
for sales in etl_dataset:
    print(sales.numpy())

[2640 3720]
[2520 3840]
[3720 4080]


In [13]:
image_ds = tf.data.Dataset.list_files('images/*/*', shuffle=False)
for file in image_ds.take(5):
    print(file.numpy())

b'images\\messi\\060724  Lionel Messi of Argentina looks 1920  .jpg'
b'images\\messi\\1200x675_cmsv2_397eddf0-1e7d-5271-b801-571fc21b14c1-8353416.jpg'
b'images\\messi\\1710795602070_e0b48d8c-087c-4220-b368-1047fc7dee76.jpg'
b'images\\messi\\17185783639125.jpg'
b'images\\messi\\17218817761201.jpg'


In [16]:
image_ds = image_ds.shuffle(200)
for file in image_ds.take(3):
    print(file.numpy())

b'images\\messi\\GettyImages-1666756533.jpg'
b'images\\messi\\17185783639125.jpg'
b'images\\ronaldo\\cristiano-ronaldo-al-nassr-2023-1692731063-114594.jpg'


In [17]:
image_count = len(image_ds)
image_count

89

In [18]:
train_size = int(image_count*0.8)

train_ds = image_ds.take(train_size)
test_ds = image_ds.skip(train_size)

In [20]:
len(train_ds), len(test_ds)

(71, 18)

In [23]:
def get_label(file_path):
    return tf.strings.split(file_path, os.path.sep)[-2]

In [31]:
def process_image(file_path):
    label = get_label(file_path)

    img = tf.io.read_file(file_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [128,128])

    return img, label

In [35]:
train_ds = train_ds.map(process_image)

for img, label in train_ds.take(5):
    print(f"Image: {img}")
    print(f"label: {label}")

Image: [[[  1.5       5.34375  24.34375]
  [  3.09375  15.15625  34.34375]
  [  0.        0.       19.     ]
  ...
  [ 25.84375  42.84375  72.84375]
  [  0.       31.       56.     ]
  [  8.       34.       61.     ]]

 [[ 24.4375   31.4375   53.875  ]
  [  0.46875   0.59375  16.0625 ]
  [  0.        0.       19.     ]
  ...
  [ 23.0625   40.0625   70.0625 ]
  [ 10.5      37.03125  64.03125]
  [  1.90625  28.84375  55.84375]]

 [[  0.        3.71875  24.59375]
  [  1.        0.       19.125  ]
  [  0.        0.       19.     ]
  ...
  [ 16.0625   36.1875   65.625  ]
  [ 15.8125   34.8125   64.8125 ]
  [  0.       25.71875  52.71875]]

 ...

 [[ 84.21875 125.21875  55.21875]
  [ 83.0625  124.0625   56.0625 ]
  [ 82.4375  123.4375   54.5625 ]
  ...
  [ 68.84375 117.28125  29.96875]
  [ 67.3125  118.3125   25.3125 ]
  [ 68.3125  117.3125   26.3125 ]]

 [[ 71.03125 113.03125  40.90625]
  [ 76.9375  118.46875  47.40625]
  [ 68.25    110.25     38.125  ]
  ...
  [ 76.0625  125.0625   34.    

In [37]:
def scale(image, label):
    return image/255, label

In [38]:
train_ds = train_ds.map(scale)
for image, label in train_ds.take(5):
    print(f"Image: {image}")
    print(f"label: {label}")

Image: [[[0.29941788 0.31499502 0.3469401 ]
  [0.30803654 0.3238875  0.34390318]
  [0.31865233 0.33041704 0.35247588]
  ...
  [0.35936925 0.39466336 0.4534869 ]
  [0.34879556 0.39905024 0.4522059 ]
  [0.35629788 0.40479282 0.45897672]]

 [[0.27034312 0.30104166 0.33107767]
  [0.2791954  0.30664638 0.3301758 ]
  [0.28918505 0.31663603 0.34062117]
  ...
  [0.34087968 0.3958161  0.4467965 ]
  [0.34509805 0.39950982 0.4507353 ]
  [0.35121593 0.3984777  0.45327818]]

 [[0.24877451 0.28799018 0.3242896 ]
  [0.26418504 0.29841068 0.32532743]
  [0.2767157  0.30759802 0.3372549 ]
  ...
  [0.3372549  0.39215687 0.44313726]
  [0.3467697  0.3938285  0.44088733]
  [0.34730393 0.39436275 0.4492647 ]]

 ...

 [[0.51893955 0.36991996 0.4130572 ]
  [0.501011   0.35199142 0.39512867]
  [0.47340685 0.32953238 0.3587929 ]
  ...
  [0.4189951  0.3915441  0.36017156]
  [0.5760589  0.5682158  0.52115697]
  [0.67126226 0.655576   0.60851717]]

 [[0.5389189  0.38989928 0.43323952]
  [0.51366615 0.36464652 0.407