# Preparing features and labels for Machine Learning

![ml](data/ml.png)

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [9]:
dataset = tf.data.Dataset.range(10) #Creates dataset
for val in dataset:
    print(val.numpy())

0
1
2
3
4
5
6
7
8
9


In [13]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1) #Creates windows
for window_dataset in dataset:
    for val in window_dataset:
        print(val.numpy(), end=" ")
    print()

0 1 2 3 4 
1 2 3 4 5 
2 3 4 5 6 
3 4 5 6 7 
4 5 6 7 8 
5 6 7 8 9 
6 7 8 9 
7 8 9 
8 9 
9 


In [14]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True) #Constant chunks
for window_dataset in dataset:
    for val in window_dataset:
        print(val.numpy(), end=" ")
    print()

0 1 2 3 4 
1 2 3 4 5 
2 3 4 5 6 
3 4 5 6 7 
4 5 6 7 8 
5 6 7 8 9 


In [16]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5)) #To numpy
for window in dataset:
    print(window.numpy())

[0 1 2 3 4]
[1 2 3 4 5]
[2 3 4 5 6]
[3 4 5 6 7]
[4 5 6 7 8]
[5 6 7 8 9]


In [17]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
dataset = dataset.map(lambda window: (window[:-1], window[-1:])) #Separate features and labels
for x,y in dataset:
    print(x.numpy(), y.numpy())

[0 1 2 3] [4]
[1 2 3 4] [5]
[2 3 4 5] [6]
[3 4 5 6] [7]
[4 5 6 7] [8]
[5 6 7 8] [9]


In [18]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
dataset = dataset.map(lambda window: (window[:-1], window[-1:]))
dataset = dataset.shuffle(buffer_size=10) #Shuffle the data
for x,y in dataset:
    print(x.numpy(), y.numpy())

[5 6 7 8] [9]
[2 3 4 5] [6]
[1 2 3 4] [5]
[0 1 2 3] [4]
[4 5 6 7] [8]
[3 4 5 6] [7]


In [19]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
dataset = dataset.map(lambda window: (window[:-1], window[-1:]))
dataset = dataset.shuffle(buffer_size=10)
dataset = dataset.batch(2).prefetch(1) #Create batches
for x,y in dataset:
    print("x = ", x.numpy())
    print("y = ", y.numpy())

x =  [[1 2 3 4]
 [0 1 2 3]]
y =  [[5]
 [4]]
x =  [[3 4 5 6]
 [4 5 6 7]]
y =  [[7]
 [8]]
x =  [[2 3 4 5]
 [5 6 7 8]]
y =  [[6]
 [9]]


#### The tf.data.Dataset API supports writing descriptive and efficient input pipelines. Dataset usage follows a common pattern:
- Create a source dataset from your input data.
- Apply dataset transformations to preprocess the data.
- Iterate over the dataset and process the elements.

Documentation: https://www.tensorflow.org/api_docs/python/tf/data/Dataset
