In [1]:
from models.modules import *

In [2]:
import tensorflow as tf
from tensorflow.contrib.rnn import GRUCell, MultiRNNCell, OutputProjectionWrapper, ResidualWrapper
from tensorflow.contrib.seq2seq import BasicDecoder, BahdanauAttention, AttentionWrapper
from text.symbols import symbols
from hparams import hparams, hparams_debug_string

In [3]:
batch_size = 1
input_len = 10
output_len = 100

is_training = True

hparams.parse('')
hp = hparams

In [4]:
inputs = tf.placeholder(tf.int32, [batch_size, input_len], 'inputs')
input_lengths = tf.placeholder(tf.int32, [batch_size], 'input_lengths')
mel_targets = tf.placeholder(tf.float32, [batch_size, output_len, 80], 'mel_targets')
linear_targets = tf.placeholder(tf.float32, [batch_size, output_len, 1025], 'linear_targets')

### Step0. char embedding and prenet

In [5]:
embedding_table = tf.get_variable('embedding', [len(symbols), hp.embed_depth], dtype=tf.float32,
                initializer=tf.truncated_normal_initializer(stddev=0.5))
embedded_inputs = tf.nn.embedding_lookup(embedding_table, inputs)

In [6]:
prenet_outputs = prenet(embedded_inputs, is_training, hp.prenet_depths)

### Step1. Conv1 stacking (Conv, Batch)

In [7]:
input_channels = prenet_outputs.get_shape()[2]
K = 16
projections = [128, input_channels]
depth = hp.encoder_depth

In [8]:
conv_set = [conv1d(prenet_outputs, k, 128, tf.nn.relu, is_training, 'conv1d_%d' % k) for k in range(1, K + 1)]

In [9]:
conv_outputs = tf.concat(conv_set, axis = -1)

In [10]:
maxpool_output = tf.layers.max_pooling1d(conv_outputs, pool_size=2, strides=1, padding='same')

In [12]:
proj1_output = conv1d(maxpool_output, 3, projections[0], tf.nn.relu, is_training, 'proj_1')
proj2_output = conv1d(proj1_output, 3, projections[1], None, is_training, 'proj_2')

### Step2. Highway network (High)

In [16]:
highway_input = proj2_output + prenet_outputs

In [22]:
half_depth = depth // 2
assert half_depth*2 == depth, 'encoder and postnet depths must be even.'

# Handle dimensionality mismatch:
if highway_input.shape[2] != half_depth:
    highway_input = tf.layers.dense(highway_input, half_depth)

In [23]:
for i in range(4):
    highway_input = highwaynet(highway_input, 'highway_%d' % (i+1), half_depth)
rnn_input = highway_input

### Step4. Bidirectional GRU (GRU)

In [25]:
outputs, states = tf.nn.bidirectional_dynamic_rnn(
    GRUCell(half_depth),
    GRUCell(half_depth),
    rnn_input,
    sequence_length=input_lengths,
    dtype=tf.float32)

Instructions for updating:
seq_dim is deprecated, use seq_axis instead
Instructions for updating:
batch_dim is deprecated, use batch_axis instead
