#Imports

In [1]:
import tensorflow as tf

#Create a Simple Dataset

In [2]:
#Generate a tf dataset with 10 elements (0-9)
dataset = tf.data.Dataset.range(10)

#Preview the result
for val in dataset:
  print(val.numpy())

0
1
2
3
4
5
6
7
8
9


#Windowing the data

In [4]:
#Generate a tf dataset with 10 elements (0-9)
dataset = tf.data.Dataset.range(10)

#window the data
dataset = dataset.window(size = 5, shift = 1)

#convert to numpy arrays
for window_dataset in dataset:
  print([item.numpy() for item in window_dataset])

[0, 1, 2, 3, 4]
[1, 2, 3, 4, 5]
[2, 3, 4, 5, 6]
[3, 4, 5, 6, 7]
[4, 5, 6, 7, 8]
[5, 6, 7, 8, 9]
[6, 7, 8, 9]
[7, 8, 9]
[8, 9]
[9]


We can use the `drop_remainder` to only keep ones with 5 elements.

In [5]:
#Generate a tf dataset with 10 elements (0-9)
dataset = tf.data.Dataset.range(10)

#window the data
dataset = dataset.window(size = 5, shift = 1, drop_remainder=True)

#convert to numpy arrays
for window_dataset in dataset:
  print([item.numpy() for item in window_dataset])

[0, 1, 2, 3, 4]
[1, 2, 3, 4, 5]
[2, 3, 4, 5, 6]
[3, 4, 5, 6, 7]
[4, 5, 6, 7, 8]
[5, 6, 7, 8, 9]


#Flatten the windows

We convert windows from `dataset` structure to tensors using the flat_map() method, which will be applied to each window and the results will be flattened into a single dataset.

In [7]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)

# Flatten the windows by putting its elements in 5 batches
dataset = dataset.flat_map(lambda window: window.batch(5))

#print results
for window in dataset:
  print(window.numpy())

[0 1 2 3 4]
[1 2 3 4 5]
[2 3 4 5 6]
[3 4 5 6 7]
[4 5 6 7 8]
[5 6 7 8 9]


#Group into features and labels

In [8]:
dataset = tf.data.Dataset.range(10) #generate data
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5)) #flatten

#create tuples with features (first 4 elements) and labels(last element)
dataset = dataset.map(lambda window: (window[:-1], window[-1]))

#print
for x,y in dataset:
  print("x = ", x.numpy())
  print("y = ", y.numpy())
  print()

x =  [0 1 2 3]
y =  4

x =  [1 2 3 4]
y =  5

x =  [2 3 4 5]
y =  6

x =  [3 4 5 6]
y =  7

x =  [4 5 6 7]
y =  8

x =  [5 6 7 8]
y =  9



#Shuffle data

In [10]:
dataset = tf.data.Dataset.range(10) #generate data
dataset = dataset.window(5, shift=1, drop_remainder=True) #window data
dataset = dataset.flat_map(lambda window: window.batch(5))#flatten
dataset = dataset.map(lambda window: (window[:-1], window[-1])) #features and labels

#shuffle
dataset = dataset.shuffle(buffer_size=10)

#print
for x,y in dataset:
  print("x = ", x.numpy())
  print("y = ", y.numpy())
  print()


x =  [0 1 2 3]
y =  4

x =  [2 3 4 5]
y =  6

x =  [5 6 7 8]
y =  9

x =  [3 4 5 6]
y =  7

x =  [4 5 6 7]
y =  8

x =  [1 2 3 4]
y =  5



#Create batches for training

Lastly, you will want to group your windows into batches. You can do that with the batch() method as shown below.
- Simply specify the batch size and it will return a **batched dataset with that number of windows**.
- As a rule of thumb, it is also good to specify a prefetch() step. This optimizes the execution time when the model is already training. By specifying a prefetch buffer_size of 1 as shown below, Tensorflow will prepare the next one batch in advance (i.e. putting it in a buffer) while the current batch is being consumed by the model. You can read more about it here.

In [11]:
dataset = tf.data.Dataset.range(10) #generate data
dataset = dataset.window(5, shift=1, drop_remainder=True) #window the data
dataset = dataset.flat_map(lambda window: window.batch(5)) #flatten windows
dataset = dataset.map(lambda window: (window[:-1], window[-1])) #gets features+labels
dataset = dataset.shuffle(buffer_size=10) #shuffles

#create batches of windows
dataset = dataset.batch(2).prefetch(1)

#print
for x,y in dataset:
  print("x = ", x.numpy())
  print("y = ", y.numpy())
  print()



x =  [[2 3 4 5]
 [3 4 5 6]]
y =  [6 7]

x =  [[4 5 6 7]
 [1 2 3 4]]
y =  [8 5]

x =  [[5 6 7 8]
 [0 1 2 3]]
y =  [9 4]

