In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import os
import json
import datetime

In [2]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [3]:
dataset_name = "SEG_Wavenet"

In [4]:
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
if not os.path.exists("version"):
    os.makedirs("version")
version_dir = "version/" + timestamp 

timestamp

'20201208-060233'

In [5]:
param_list = dict()

param_list["BATCH_SIZE"] = 8
param_list["DILATIONS"] = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512]
param_list["FILTER_WIDTH"] = 2                          # == kernel_size
param_list["RECEPTIVE_FIELD"] = (param_list["FILTER_WIDTH"] - 1) * sum(param_list["DILATIONS"]) + param_list["FILTER_WIDTH"]
param_list["DILATION_CHANNELS"] = 32
param_list["RESIDUAL_CHANNELS"] = 24
param_list["SKIP_CHANNELS"] = 128
param_list["OUT_CHANNELS"] = 16293                      # == vocab_size
param_list["USE_BIASES"] = False
param_list["BUFFER_SIZE"] = 200000
param_list["SHUFFLE_SEED"] = 102


#quantization_channels = 2**8
#out_channels = 10*3

#initial_filter_width = 32       # Scalar Input
#receptive_field = sum(dilation) + initial_filter_width         # Scalar Input

In [6]:
param_list["RECEPTIVE_FIELD"]

2048

In [7]:
train_set = np.genfromtxt("data/{}_train_set.csv".format(dataset_name), delimiter="\n", dtype=np.int64)

In [8]:
x_train = tf.data.Dataset.from_tensor_slices(train_set[:-1]).window(param_list["RECEPTIVE_FIELD"], 1, 1, True)
x_train = x_train.flat_map(lambda x: x.batch(param_list["RECEPTIVE_FIELD"])) 
x_train = x_train.batch(param_list["BATCH_SIZE"])

In [9]:
y_train = tf.data.Dataset.from_tensor_slices(train_set[param_list["RECEPTIVE_FIELD"]:]).window(1, 1, 1, True)
y_train = y_train.flat_map(lambda y: y.batch(1))
#y_train = y_train.map(lambda y: tf.expand_dims(y, axis=-1))
y_train = y_train.batch(param_list["BATCH_SIZE"])

In [10]:
train_data = tf.data.Dataset.zip((x_train, y_train)).shuffle(param_list["BUFFER_SIZE"], param_list["SHUFFLE_SEED"], reshuffle_each_iteration=True).prefetch(param_list["BUFFER_SIZE"])

In [11]:
val_set = np.genfromtxt("data/{}_val_set.csv".format(dataset_name), delimiter="\n", dtype=np.int64)

In [12]:
x_val = tf.data.Dataset.from_tensor_slices(val_set[:-1]).window(param_list["RECEPTIVE_FIELD"], 1, 1, True)
x_val = x_val.flat_map(lambda x: x.batch(param_list["RECEPTIVE_FIELD"]))
x_val = x_val.batch(param_list["BATCH_SIZE"])

In [13]:
y_val = tf.data.Dataset.from_tensor_slices(val_set[param_list["RECEPTIVE_FIELD"]:]).window(1, 1, 1, True)
y_val = y_val.flat_map(lambda y: y.batch(1))
y_val = y_val.batch(param_list["BATCH_SIZE"])

In [14]:
val_data = tf.data.Dataset.zip((x_val, y_val))

In [15]:
class Conv1D(keras.layers.Conv1D):
    def __init__(self, filters, kernel_size, strides=1, padding="causal", dilation_rate=1, use_bias=False, *args, **kwargs):
        super().__init__(filters, kernel_size=kernel_size, strides=strides, padding=padding, dilation_rate=dilation_rate)
        
        ## (issue) Set name other than k and d invoke error : TypeError: unsupported operand type(s) for +: 'int' and 'tuple'
        self.k = kernel_size                
        self.d = dilation_rate

        self.use_bias = use_bias

        if kernel_size > 1:
            self.current_receptive_field = kernel_size + (kernel_size - 1) * (dilation_rate - 1)       # == queue_len (tf2)
            self.residual_channels = residual_channels
            self.queue = tf.zeros([1, self.current_receptive_field, filters])

    def build(self, x_shape):
        super().build(x_shape)

        self.linearized_weights = tf.cast(tf.reshape(self.kernel, [-1, self.filters]), dtype=tf.float32)

    def call(self, x, training=False):
        if not training:
            return super().call(x)

        if self.kernel_size > 1:
            self.queue = self.queue[:, 1:, :]
            self.queue = tf.concat([self.queue, tf.expand_dims(x[:, -1, :], axis=1)], axis=1)

            if self.dilation_rate > 1:
                x = self.queue[:, 0::self.d, :]
            else:
                x = self.queue

            outputs = tf.matmul(tf.reshape(x, [1, -1]), self.linearized_weights)
            
            if self.use_bias:
                outputs = tf.nn.bias_add(outputs, self.bias)

            return tf.reshape(outputs, [-1, 1, self.filters])       

In [16]:
class ResidualBlock(keras.Model):
    def __init__(self, layer_index, dilation, filter_width, dilation_channels, residual_channels, skip_channels, use_biases, output_width):
        super().__init__()

        self.layer_index = layer_index
        self.dilation = dilation
        self.filter_width = filter_width
        self.dilation_channels = dilation_channels
        self.residual_channels = residual_channels
        self.skip_channels = skip_channels
        self.use_biases = use_biases
        self.output_width = output_width

    def build(self, input_shape):
        self.conv_filter = keras.layers.Conv1D(
            filters=self.dilation_channels,
            kernel_size=self.filter_width,
            dilation_rate=self.dilation,
            padding='valid',
            use_bias=self.use_biases,
            name="residual_block_{}/conv_filter".format(self.layer_index)
        )
        self.conv_gate = keras.layers.Conv1D(
            filters=self.dilation_channels,
            kernel_size=self.filter_width,
            dilation_rate=self.dilation,
            padding='valid',
            use_bias=self.use_biases,
            name="residual_block_{}/conv_gate".format(self.layer_index)
        )
        ## transformed : 1x1 conv to out (= gate * filter) to produce residuals (= dense output)
        ## conv_residual (=skip_contribution in original)
        self.conv_residual = keras.layers.Conv1D(
            filters=self.residual_channels,
            kernel_size=1,
            padding="same",
            use_bias=self.use_biases,
            name="residual_block_{}/dense".format(self.layer_index)
        )
        self.conv_skip = keras.layers.Conv1D(
            filters=self.skip_channels,
            kernel_size=1,
            padding="same",
            use_bias=self.use_biases,
            name="residual_block_{}/skip".format(self.layer_index)
        )


    @tf.function
    def call(self, inputs, training=False):
        out = tf.tanh(self.conv_filter(inputs)) * tf.sigmoid(self.conv_gate(inputs))
        
        if training:
            skip_cut = tf.shape(out)[1] - self.output_width
        else:
            skip_cut = tf.shape(out)[1] - 1

        out_skip = tf.slice(out, [0, skip_cut, 0], [-1, -1, self.dilation_channels])
        skip_output = self.conv_skip(out_skip)

        transformed = self.conv_residual(out)
        input_cut = tf.shape(inputs)[1] - tf.shape(transformed)[1]
        x_cut = tf.slice(inputs, [0, input_cut, 0], [-1, -1, -1])
        dense_output = x_cut + transformed

        return skip_output, dense_output

In [17]:
class PostProcessing(keras.Model):
    def __init__(self, skip_channels, out_channels, use_biases):
        super().__init__()

        self.skip_channels = skip_channels
        self.out_channels = out_channels        # out_channels == quantization_channels
        self.use_biases = use_biases

    def build(self, input_shape):
        self.conv_1 = keras.layers.Conv1D(
            filters=self.skip_channels,
            kernel_size=1,
            padding="same",
            use_bias=self.use_biases,
            name="postprocessing/conv_1"
        )
        self.conv_2 = keras.layers.Conv1D(
            filters=self.out_channels,
            kernel_size=1,
            padding="same",
            use_bias=self.use_biases,
            name="postprocessing/conv_2"
        )
    
    @tf.function
    def call(self, inputs, training=False):
        x = tf.nn.relu(inputs)
        x = self.conv_1(x)

        x = tf.nn.relu(x)
        x = self.conv_2(x)

        return x

In [18]:
class WaveNet(keras.Model):
    def __init__(self, batch_size, dilations, filter_width, dilation_channels, residual_channels, skip_channels, out_channels=None, use_biases=False):
        super().__init__()

        self.batch_size = batch_size
        self.dilations = dilations
        self.filter_width = filter_width
        #self.initial_filter_width = initial_filter_width       # Scalar Input
        self.dilation_channels = dilation_channels
        self.residual_channels = residual_channels
        self.skip_channels = skip_channels
        # quantization_channels == out_channels
        self.out_channels = out_channels             # Same as vocab_size in encoder-decoder
        self.use_biases = use_biases

        # Scalar Input receptive field
        #self.receptive_field = (self.filter_width - 1) * sum(self.dilations) + self.initial_filter_width

        # Onehot Input Receptive Field
        self.receptive_field = (self.filter_width - 1) * sum(self.dilations) + self.filter_width

    def build(self, input_shape):  # Unable to retrieve input_shape when using tf.data.Dataset  
        #self.output_width = input_shape[1] - self.receptive_field + 1       # total output width of model
        
        self.output_width = 1

        self.preprocessing_layer = keras.layers.Conv1D(
            filters=self.residual_channels,
            #kernel_size=self.initial_filter_width,     # Scalar Input
            kernel_size=self.filter_width,
            use_bias=self.use_biases,
            name="preprocessing/conv")

        self.residual_blocks = []
        for i, dilation in enumerate(self.dilations):
            self.residual_blocks.append(
                ResidualBlock(
                    layer_index=i,
                    dilation=dilation, 
                    filter_width=self.filter_width, 
                    dilation_channels=self.dilation_channels, 
                    residual_channels=self.residual_channels, 
                    skip_channels=self.skip_channels, 
                    use_biases=self.use_biases, 
                    output_width=self.output_width)
                )

        self.postprocessing_layer = PostProcessing(self.skip_channels, self.out_channels, self.use_biases)

    @tf.function(experimental_relax_shapes=True)
    def call(self, inputs, training=False):
        #inputs = tf.sparse.to_dense(inputs)     # x from onehot dataset
        inputs = tf.one_hot(inputs, self.out_channels, axis=-1)
        
        x = self.preprocessing_layer(inputs)
        skip_outputs = []
        
        for layer_index in range(len(self.dilations)):
            skip_output, x = self.residual_blocks[layer_index](x, training=training)
            skip_outputs.append(skip_output)
            
        skip_sum = tf.math.add_n(skip_outputs)
        
        output = self.postprocessing_layer(skip_sum)
        
        #out = tf.reshape(output, [self.batch_size, -1, self.out_channels])
        #output = sample_from_discretized_mix_logistic(out)             # Generative

        #if not training:
        #    output = tf.nn.softmax(tf.cast(output, tf.float64))
        
        return output

    def train_step(self, data): 
        x, y = data
        y = tf.one_hot(y, self.out_channels, axis=-1)        
        #y = tf.expand_dims(tf.sparse.to_dense(y), axis=1)      # y from onehot dataset

        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)
            loss = self.compiled_loss(y, y_pred)
            #reduced_loss = tf.math.reduce_mean(loss)
            
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)

        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        self.compiled_metrics.update_state(y, y_pred)

        return {m.name: m.result() for m in self.metrics}

    def test_step(self, data):
        x, y = data
        y = tf.one_hot(y, self.out_channels, axis=-1)

        y_pred = self(x, training=False)

        loss = self.compiled_loss(y, y_pred)
        self.compiled_metrics.update_state(y, y_pred)

        return {m.name: m.result() for m in self.metrics}

In [19]:
wavenet = WaveNet(
    batch_size=param_list["BATCH_SIZE"], 
    dilations=param_list["DILATIONS"], 
    filter_width=param_list["FILTER_WIDTH"], 
    dilation_channels=param_list["DILATION_CHANNELS"], 
    residual_channels=param_list["RESIDUAL_CHANNELS"], 
    skip_channels=param_list["SKIP_CHANNELS"], 
    out_channels=param_list["OUT_CHANNELS"]
    )

In [20]:
wavenet.compile(keras.optimizers.Nadam(), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=[keras.metrics.CategoricalAccuracy()])

In [21]:
history = wavenet.fit(train_data, epochs=10000, validation_data=val_data, 
    callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", patience=3, restore_best_weights=True)])

Epoch 1/10000
Epoch 2/10000
Epoch 3/10000
Epoch 4/10000
Epoch 5/10000
Epoch 6/10000
Epoch 7/10000
Epoch 8/10000
Epoch 9/10000
Epoch 10/10000
Epoch 11/10000
Epoch 12/10000
Epoch 13/10000
Epoch 14/10000
Epoch 15/10000


In [22]:
wavenet.save(version_dir)

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: version/20201208-060233\assets


In [23]:
train_history = pd.DataFrame.from_dict(history.history)
train_history

Unnamed: 0,loss,categorical_accuracy,val_loss,val_categorical_accuracy
0,5.754943,0.394284,6.494298,0.234984
1,4.61586,0.41507,5.325154,0.249771
2,3.798453,0.437847,4.519756,0.295859
3,3.212161,0.481189,4.012517,0.359268
4,2.782216,0.534366,3.609171,0.436159
5,2.736429,0.561971,3.425898,0.481515
6,2.303215,0.607593,3.260771,0.531052
7,2.125237,0.636154,3.177567,0.549236
8,1.988255,0.657021,3.119489,0.579282
9,1.865956,0.67625,3.040307,0.594775


In [24]:
train_history.to_csv("version/{}/train_history.csv".format(timestamp), index=False)

In [25]:
with open("version/{}/dilations.json".format(timestamp), "w") as j:
    json.dump({"DILATIONS":param_list["DILATIONS"]}, j, indent=4)