## Connect to Local Runtime
jupyter notebook --NotebookApp.allow_origin='https://colab.research.google.com' --port=8888 --NotebookApp.port_retries=0

In [1]:
import tensorflow as tf
import tensorflow_io as tfio
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow_addons import layers as addon_layers

In [3]:
print(tf.__version__)
# print(addon_layers.__version__)

2.7.0


## DataSet

In [4]:
## Local Data-path
DATA_PATH = "GENERATIVE_DATA/LJSpeech-1.1/wavs/*.wav"

In [6]:
# Load in 13100 discrete speech samples (wav files and ignore the audio annotations)
wavs = tf.io.gfile.glob(DATA_PATH)
print(f"Number of audio files: {len(wavs)}")

Number of audio files: 13100


## Model Structure

#### Dialated Residual Block

In [7]:
def residual_stack(input, filters):
    """Convolutional residual stack with weight normalization.

       Dilated Conv: A dilated convolution effectively allows the network to operate on 
                     a coarser scale than with a normal convolution. This is similar to pooling or strided convolutions, but
                     here the $$output has the same size as the input$$.
    Args:
        filter: int, determines filter size for the residual stack.

    Returns:
        Residual stack output.
    """
    c1 = addon_layers.WeightNormalization(
        layers.Conv1D(filters, 3, dilation_rate=1, padding="same"), data_init=False
    )(input)
    lrelu1 = layers.LeakyReLU()(c1)
    c2 = addon_layers.WeightNormalization(
        layers.Conv1D(filters, 3, dilation_rate=1, padding="same"), data_init=False
    )(lrelu1)
    add1 = layers.Add()([c2, input])

    lrelu2 = layers.LeakyReLU()(add1)
    c3 = addon_layers.WeightNormalization(
        layers.Conv1D(filters, 3, dilation_rate=3, padding="same"), data_init=False
    )(lrelu2)
    lrelu3 = layers.LeakyReLU()(c3)
    c4 = addon_layers.WeightNormalization(
        layers.Conv1D(filters, 3, dilation_rate=1, padding="same"), data_init=False
    )(lrelu3)
    add2 = layers.Add()([add1, c4])

    lrelu4 = layers.LeakyReLU()(add2)
    c5 = addon_layers.WeightNormalization(
        layers.Conv1D(filters, 3, dilation_rate=9, padding="same"), data_init=False
    )(lrelu4)
    lrelu5 = layers.LeakyReLU()(c5)
    c6 = addon_layers.WeightNormalization(
        layers.Conv1D(filters, 3, dilation_rate=1, padding="same"), data_init=False
    )(lrelu5)
    add3 = layers.Add()([c6, add2])

    return add3

In [15]:
test_in_shape = (128, 4) # (T, C)
test_fiters = 4 # out_channel size, needs to be same as input filters size for residual connection
test_in = keras.Input(test_in_shape)
test_out = residual_stack(test_in, test_fiters)
test_model = keras.Model(test_in, test_out)

test_model.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_5 (InputLayer)           [(None, 128, 4)]     0           []                               
                                                                                                  
 weight_normalization_24 (Weigh  (None, 128, 4)      57          ['input_5[0][0]']                
 tNormalization)                                                                                  
                                                                                                  
 leaky_re_lu_18 (LeakyReLU)     (None, 128, 4)       0           ['weight_normalization_24[0][0]']
                                                                                                  
 weight_normalization_25 (Weigh  (None, 128, 4)      57          ['leaky_re_lu_18[0][0]']   

In [17]:
test_sample = tf.random.normal([32, 128, 4])
test_res_out = residual_stack(test_sample, test_fiters)
test_res_out.shape # dilation conv won't change input shape

TensorShape([32, 128, 4])