In [1]:
import tensorflow as tf

In [2]:
#time_windows

#training the model to forecast the next step by training it on a time window of
#20 steps

dataset = tf.data.Dataset.range(10) #simply making a artificial dataset with the 
# help of range function 
for val in dataset:
  print(val.numpy())

0
1
2
3
4
5
6
7
8
9


In [3]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1) #it creates a window of size 5 and shift of 1 
# shift paramenter specifies how much we are shifting our data in the next step
#window returns a window/ tensor of dataset that means each elements is a dataset in itself
for window_dataset in dataset: # dataset of windows where each window is a dataset
  for val in window_dataset: #iterating of each dataset value
    print(val.numpy(), end=" ") #converting dataset into value by using numpy function
  print()

0 1 2 3 4 
1 2 3 4 5 
2 3 4 5 6 
3 4 5 6 7 
4 5 6 7 8 
5 6 7 8 9 
6 7 8 9 
7 8 9 
8 9 
9 


In [4]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True) #as many models requires
# a datset with equal number of values on every axis we use drop_remainder to achieve that.
# it gives us datasets of equal sizes
for window_dataset in dataset:
    for val in window_dataset:
        print(val.numpy(), end=" ")
    print()

0 1 2 3 4 
1 2 3 4 5 
2 3 4 5 6 
3 4 5 6 7 
4 5 6 7 8 
5 6 7 8 9 


In [5]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
#instead of having a dataset of windows where each window is a dataset it would 
# be better to have a single dataset containing batches of data in the form of 
# refular tensors, it is achieved by falt_map function which takes a finction as 
# an input and returns us with a single dataset with the specified window size or batch size
dataset = dataset.flat_map(lambda window: window.batch(5)) # calling lambda function
# on every single dataset inside nested dataset
# windows.batch(size) it takes size elements from datset and creates a tensor out of them
for window in dataset:
    print(window.numpy())

[0 1 2 3 4]
[1 2 3 4 5]
[2 3 4 5 6]
[3 4 5 6 7]
[4 5 6 7 8]
[5 6 7 8 9]


In [6]:
#usually our machine learning model requires a training set and also label set
# as this is our time series we will be having first few elements as our train 
# and last with test
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
dataset = dataset.map(lambda window: (window[:-1], window[-1:])) # this function
# returns two tensors one with n-1 element and another with last element
# by calling the map we are converting each dataset into tensors of size two
for x, y in dataset:
    print(x.numpy(), y.numpy())

[0 1 2 3] [4]
[1 2 3 4] [5]
[2 3 4 5] [6]
[3 4 5 6] [7]
[4 5 6 7] [8]
[5 6 7 8] [9]


In [7]:
#while training our model we usually need shuffled dataset we can simply achieve that
#by using the dataset.shuffle which takes buffer_size which randomly samples the 
#data from the buffer size
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
dataset = dataset.map(lambda window: (window[:-1], window[-1:]))
dataset = dataset.shuffle(buffer_size=10) #shuffling the dataset instances and
# ensuring that they are Independent and Identically Distributed or IID 
#necessary if we are usually using gradient descent
for x, y in dataset:
    print(x.numpy(), y.numpy())
#we dont shuffle data inside the tensor as it is a time series

[2 3 4 5] [6]
[5 6 7 8] [9]
[3 4 5 6] [7]
[0 1 2 3] [4]
[4 5 6 7] [8]
[1 2 3 4] [5]


In [8]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
dataset = dataset.map(lambda window: (window[:-1], window[-1:]))
dataset = dataset.shuffle(buffer_size=10)
dataset = dataset.batch(2).prefetch(1)# creating the batches of two windows at 
# each training iteration and also calling prefetch method which will be ensuring
# that our GPU keeps busy by preparing our next tensor paralelly
for x, y in dataset:
    print("x =", x.numpy())
    print("y =", y.numpy())

x = [[2 3 4 5]
 [0 1 2 3]]
y = [[6]
 [4]]
x = [[1 2 3 4]
 [5 6 7 8]]
y = [[5]
 [9]]
x = [[3 4 5 6]
 [4 5 6 7]]
y = [[7]
 [8]]


In [9]:
# final function for time window and dataset preparation
def window_dataset(series, window_size, batch_size=32,
                   shuffle_buffer=1000):
    dataset = tf.data.Dataset.from_tensor_slices(series)
    dataset = dataset.window(window_size + 1, shift=1, drop_remainder=True)
    dataset = dataset.flat_map(lambda window: window.batch(window_size + 1))
    dataset = dataset.shuffle(shuffle_buffer)
    dataset = dataset.map(lambda window: (window[:-1], window[-1]))
    dataset = dataset.batch(batch_size).prefetch(1)
    return dataset
    #it will convert any time series into a tennsorflow dataset which we can use
    #for training our model ofcourse tuning of hyperparameter is required according
    #to your needs