In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [1]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

In [2]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
print(tf.__version__)

2.4.1


In [3]:
dataset = tf.data.Dataset.range(10)
for val in dataset:
   print(val.numpy())

0
1
2
3
4
5
6
7
8
9


In [4]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1)  # sequence of size 5, each with a shift of 1 
for window_dataset in dataset:
  # print(f"+ {window_dataset}")
  for val in window_dataset:
    print(val.numpy(), end=" ")
  print()

0 1 2 3 4 
1 2 3 4 5 
2 3 4 5 6 
3 4 5 6 7 
4 5 6 7 8 
5 6 7 8 9 
6 7 8 9 
7 8 9 
8 9 
9 


In [5]:
# drop_remainder

dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
for window_dataset in dataset:
  # print(f"> {window_dataset.numpy()}")  # cannot be turned into numpy array directly
  for val in window_dataset:  # foreach number in windowed dataset
    print(val.numpy(), end=" * ")
  print()

0 * 1 * 2 * 3 * 4 * 
1 * 2 * 3 * 4 * 5 * 
2 * 3 * 4 * 5 * 6 * 
3 * 4 * 5 * 6 * 7 * 
4 * 5 * 6 * 7 * 8 * 
5 * 6 * 7 * 8 * 9 * 


In [6]:
# creating a sequence of ndarrays

dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
for window in dataset:
  print(window.numpy())


[0 1 2 3 4]
[1 2 3 4 5]
[2 3 4 5 6]
[3 4 5 6 7]
[4 5 6 7 8]
[5 6 7 8 9]


In [7]:
# creating labels

dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
dataset = dataset.map(lambda window: (window[:-1], window[-1:]))
for x,y in dataset:
  print(x.numpy(), y.numpy())

[0 1 2 3] [4]
[1 2 3 4] [5]
[2 3 4 5] [6]
[3 4 5 6] [7]
[4 5 6 7] [8]
[5 6 7 8] [9]


In [17]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))  # ... a
# print(list(dataset.as_numpy_iterator())) # ~> shows 5 arrays
# for e in dataset: 
#   print(e.numpy())

dataset = dataset.map(lambda window: (window[:-1], window[-1:]))  # ... b 
# ... without (a) => '_VariantDataset' object is not subscriptable

dataset = dataset.shuffle(buffer_size=10)
for x,y in dataset:
  print(x.numpy(), y.numpy())


[5 6 7 8] [9]
[4 5 6 7] [8]
[0 1 2 3] [4]
[1 2 3 4] [5]
[3 4 5 6] [7]
[2 3 4 5] [6]


In [18]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
dataset = dataset.map(lambda window: (window[:-1], window[-1:]))
dataset = dataset.shuffle(buffer_size=10)
dataset = dataset.batch(2).prefetch(1)  # prefetch?
for x,y in dataset:
  print("x = ", x.numpy())
  print("y = ", y.numpy())


x =  [[2 3 4 5]
 [5 6 7 8]]
y =  [[6]
 [9]]
x =  [[0 1 2 3]
 [3 4 5 6]]
y =  [[4]
 [7]]
x =  [[1 2 3 4]
 [4 5 6 7]]
y =  [[5]
 [8]]
