<a href="https://colab.research.google.com/github/skhazaei/TensorFlow-repo/blob/master/data_series_into_windows.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Prepare features and labels:
Use `tf.data.Dataset` class to create a simple dataset with `range()` method.

In [5]:
import tensorflow as tf

dataset = tf.data.Dataset.range(10)
print(dataset)

<RangeDataset shapes: (), types: tf.int64>


In [6]:
for val in dataset:
  print(val)

tf.Tensor(0, shape=(), dtype=int64)
tf.Tensor(1, shape=(), dtype=int64)
tf.Tensor(2, shape=(), dtype=int64)
tf.Tensor(3, shape=(), dtype=int64)
tf.Tensor(4, shape=(), dtype=int64)
tf.Tensor(5, shape=(), dtype=int64)
tf.Tensor(6, shape=(), dtype=int64)
tf.Tensor(7, shape=(), dtype=int64)
tf.Tensor(8, shape=(), dtype=int64)
tf.Tensor(9, shape=(), dtype=int64)


In [7]:
for val in dataset:
  print(val.numpy())

0
1
2
3
4
5
6
7
8
9


## Use `Dataset.window()` method to expand the dataset; window the data into chunks of five items, shifting by one each time.

In [23]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(size=5, shift=1)
for val in dataset:
  print(val)

<_VariantDataset shapes: (), types: tf.int64>
<_VariantDataset shapes: (), types: tf.int64>
<_VariantDataset shapes: (), types: tf.int64>
<_VariantDataset shapes: (), types: tf.int64>
<_VariantDataset shapes: (), types: tf.int64>
<_VariantDataset shapes: (), types: tf.int64>
<_VariantDataset shapes: (), types: tf.int64>
<_VariantDataset shapes: (), types: tf.int64>
<_VariantDataset shapes: (), types: tf.int64>
<_VariantDataset shapes: (), types: tf.int64>


In [24]:
for window_dataset in dataset:
  for val in window_dataset:
    print(val.numpy(), end='')
  print()

01234
12345
23456
34567
45678
56789
6789
789
89
9


## Use `drop_remainder` parameter to truncate the data by dropping all of the remainders.

In [25]:
import tensorflow as tf

dataset = tf.data.Dataset.range(10)
dataset = dataset.window(size=5, shift=1, drop_remainder=True)
for window_dataset in dataset:
  for val in window_dataset:
    print(val.numpy(), end='')
  print()


01234
12345
23456
34567
45678
56789


## Put the dataset into numpy lists using `flat_map(map_func)` to create listed in square brackets.

In [26]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(size=5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
for window in dataset:
  print(window)

tf.Tensor([0 1 2 3 4], shape=(5,), dtype=int64)
tf.Tensor([1 2 3 4 5], shape=(5,), dtype=int64)
tf.Tensor([2 3 4 5 6], shape=(5,), dtype=int64)
tf.Tensor([3 4 5 6 7], shape=(5,), dtype=int64)
tf.Tensor([4 5 6 7 8], shape=(5,), dtype=int64)
tf.Tensor([5 6 7 8 9], shape=(5,), dtype=int64)


In [27]:
for window in dataset:
  print(window.numpy())

[0 1 2 3 4]
[1 2 3 4 5]
[2 3 4 5 6]
[3 4 5 6 7]
[4 5 6 7 8]
[5 6 7 8 9]


## Now split the dataset into features and labels, namely x and y:

In [32]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(size=5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
dataset = dataset.map(lambda window: (window[:-1], window[-1:]))
for x, y in dataset:
  print(x, y)

tf.Tensor([0 1 2 3], shape=(4,), dtype=int64) tf.Tensor([4], shape=(1,), dtype=int64)
tf.Tensor([1 2 3 4], shape=(4,), dtype=int64) tf.Tensor([5], shape=(1,), dtype=int64)
tf.Tensor([2 3 4 5], shape=(4,), dtype=int64) tf.Tensor([6], shape=(1,), dtype=int64)
tf.Tensor([3 4 5 6], shape=(4,), dtype=int64) tf.Tensor([7], shape=(1,), dtype=int64)
tf.Tensor([4 5 6 7], shape=(4,), dtype=int64) tf.Tensor([8], shape=(1,), dtype=int64)
tf.Tensor([5 6 7 8], shape=(4,), dtype=int64) tf.Tensor([9], shape=(1,), dtype=int64)


In [33]:
for x, y in dataset:
  print(x.numpy(), y.numpy())

[0 1 2 3] [4]
[1 2 3 4] [5]
[2 3 4 5] [6]
[3 4 5 6] [7]
[4 5 6 7] [8]
[5 6 7 8] [9]


## Use the `shuffle(buffer_size)` method to shuffle data before training. This helps to rearrange the data so as not to accidentally introduce a sequence bias.

In [35]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(size=5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
dataset = dataset.map(lambda window: (window[:-1], window[-1:]))
dataset = dataset.shuffle(buffer_size=10)

In [36]:
for x, y in dataset:
  print(x.numpy(), y.numpy())

[1 2 3 4] [5]
[2 3 4 5] [6]
[4 5 6 7] [8]
[5 6 7 8] [9]
[0 1 2 3] [4]
[3 4 5 6] [7]


In [38]:
for x, y in dataset:
  print(x.numpy(), y.numpy())

[2 3 4 5] [6]
[4 5 6 7] [8]
[5 6 7 8] [9]
[0 1 2 3] [4]
[3 4 5 6] [7]
[1 2 3 4] [5]


## Use `batch()` method to batch dataset.

By setting a batch size of two, our data gets batched into two x's and two y's at a time.

In [40]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(size=5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
dataset = dataset.map(lambda window: (window[:-1], window[-1:]))
dataset = dataset.shuffle(buffer_size=10)
dataset = dataset.batch(2).prefetch(1)

In [50]:
for x, y in dataset:
  print("x= ", x.numpy())
  print("y= ", y.numpy(), '\n\n')

x=  [[3 4 5 6]
 [0 1 2 3]]
y=  [[7]
 [4]] 


x=  [[5 6 7 8]
 [1 2 3 4]]
y=  [[9]
 [5]] 


x=  [[4 5 6 7]
 [2 3 4 5]]
y=  [[8]
 [6]] 




In [51]:
for x, y in dataset:
  print("x= ", x.numpy())
  print("y= ", y.numpy(), '\n\n')

x=  [[0 1 2 3]
 [1 2 3 4]]
y=  [[4]
 [5]] 


x=  [[3 4 5 6]
 [2 3 4 5]]
y=  [[7]
 [6]] 


x=  [[5 6 7 8]
 [4 5 6 7]]
y=  [[9]
 [8]] 


