In [2]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import tqdm     # loop progress bar

In [3]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [4]:
dataset_name = "SEG_wavenet"

In [5]:
# Not Scalar Input
receptive_field = 257

vocab_size = 16293

In [6]:
train_set_original = np.genfromtxt("data/{}_train_set.csv".format(dataset_name), delimiter="\n", dtype=np.int64)

### Y

In [7]:
train_y_set_original = train_set_original.copy()
train_y_set_original = train_y_set_original[receptive_field:]

In [8]:
train_y_set_indices = [[i, y] for i, y in enumerate(train_y_set_original)]
train_y_set_indices[:10]

[[0, 1],
 [1, 1],
 [2, 1],
 [3, 1],
 [4, 1],
 [5, 1],
 [6, 1],
 [7, 1],
 [8, 1],
 [9, 1]]

In [9]:
train_y = tf.data.Dataset.from_tensor_slices(tf.SparseTensor(indices=train_y_set_indices, values=[1]*len(train_y_set_indices), dense_shape=[len(train_y_set_indices), vocab_size]))

In [10]:
# Size of dataset
train_y.cardinality()

<tf.Tensor: shape=(), dtype=int64, numpy=161034>

In [11]:
# Example
ex = train_y_set_indices[:10]
tf.sparse.to_dense(tf.SparseTensor(indices=ex, values=[1]*len(ex), dense_shape=[len(ex), vocab_size]))

<tf.Tensor: shape=(10, 16293), dtype=int32, numpy=
array([[0, 1, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       ...,
       [0, 1, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0]])>

### X as Onehot

In [12]:
train_x_set_original = train_set_original.copy()
train_x_set_original = train_x_set_original[:-1]

In [15]:
train_x_set = []

for i in tqdm.trange(len(train_x_set_original) - receptive_field + 1):
    seq = train_x_set_original[i:i+receptive_field]
    indices = [[i, x] for i, x in enumerate(seq)]
    train_x_set.append(tf.sparse.expand_dims(tf.SparseTensor(indices=indices, values=[1]*receptive_field, dense_shape=[receptive_field, vocab_size]), axis=0))

100%|██████████| 161034/161034 [04:23<00:00, 611.99it/s]


In [16]:
train_x_set = tf.sparse.concat(0, train_x_set)

In [23]:
train_x = tf.data.Dataset.from_tensor_slices(train_x_set)

In [25]:
train_set = tf.data.Dataset.zip((train_x, train_y))

In [29]:
tf.data.experimental.save(train_set, "data/dataset")

In [31]:
load_data = tf.data.experimental.load("data/dataset", train_set.element_spec)

In [30]:
train_set.element_spec

(SparseTensorSpec(TensorShape([257, 16293]), tf.int32),
 SparseTensorSpec(TensorShape([16293]), tf.int32))

In [36]:
for e in load_data:
    print(tf.sparse.to_dense(e[0]))
    print(tf.sparse.to_dense(e[1]))
    break

tf.Tensor(
[[1 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 ...
 [0 1 0 ... 0 0 0]
 [0 1 0 ... 0 0 0]
 [0 1 0 ... 0 0 0]], shape=(257, 16293), dtype=int32)
tf.Tensor([0 1 0 ... 0 0 0], shape=(16293,), dtype=int32)
