In [1]:
import datetime
import os
import pickle
import random
from collections import deque
from random import randrange

import numpy as np
import tensorflow as tf
from tensorflow.python.keras.initializers import RandomUniform
from tensorflow.python.keras.layers import Dense, Input, BatchNormalization, Activation, Concatenate

import sys
sys.path.append(r"C:\Study\SpeechAcquisitionModel")

from src.VTL.vtl_environment import VTLEnv


  from ._conv import register_converters as _register_converters


In [2]:
videos_dir = r"C:\Study\SpeechAcquisitionModel\reports\videos"
dt = str(datetime.datetime.now().strftime("%m_%d_%Y_%I_%M_%p"))
os.makedirs(videos_dir + r'\videos_' + dt)
videos_dir = videos_dir + r'\videos_' + dt
summaries_dir = r"C:\Study\SpeechAcquisitionModel\reports\summaries"


In [3]:


class ReplayBuffer(object):

    def __init__(self, buffer_size, random_seed=123):
        """
        The right side of the deque contains the most recent experiences
        """
        self.buffer_size = buffer_size
        self.count = 0
        self.buffer = deque()
        random.seed(random_seed)

    def add(self, s0, g0, a, s1, g1, target):
        if self.count < self.buffer_size:
            self.buffer.append((s0, g0, a, s1, g1, target))
            self.count += 1
        else:
            self.buffer.popleft()
            self.buffer.append((s0, g0, a, s1, g1, target))

    def size(self):
        return self.count

    def sample_batch(self, batch_size):
        batch = []

        if self.count < batch_size:
            batch = random.sample(self.buffer, self.count)
        else:
            batch = random.sample(self.buffer, batch_size)

        s0_batch = np.array([_[0] for _ in batch])
        g0_batch = np.array([_[1] for _ in batch])
        a_batch = np.array([_[2] for _ in batch])
        s1_batch = np.array([_[3] for _ in batch])
        g1_batch = np.array([_[4] for _ in batch])
        target_batch = np.array([_[5] for _ in batch])

        return s0_batch, g0_batch, a_batch, s1_batch, g1_batch, target_batch

    def clear(self):
        self.buffer.clear()
        self.count = 0
# Taken from https://github.com/openai/baselines/blob/master/baselines/ddpg/noise.py, which is
# based on http://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab
class OrnsteinUhlenbeckActionNoise:
    def __init__(self, mu, sigma=0.01, theta=.15, dt=1e-2, x0=None):
        self.theta = theta
        self.mu = mu
        self.sigma = sigma
        self.dt = dt
        self.x0 = x0
        self.reset()

    def __call__(self):
        x = self.x_prev + self.theta * (self.mu - self.x_prev) * self.dt + \
            self.sigma * np.sqrt(self.dt) * np.random.normal(size=self.mu.shape)
        self.x_prev = x
        return x

    def reset(self):
        self.x_prev = self.x0 if self.x0 is not None else np.zeros_like(self.mu)

    def __repr__(self):
        return 'OrnsteinUhlenbeckActionNoise(mu={}, sigma={})'.format(self.mu, self.sigma)


In [4]:
class ModelDynamics(object):
    """
    Input to the network is the current state and the goal state, output is the next state.
    """

    def __init__(self, name, model_settings):
        self.sess = tf.get_default_session()
        assert (self.sess is not None)
        self.name = name
        self.s_dim = model_settings['state_dim']
        self.state_bound = model_settings['state_bound']
        self.g_dim = model_settings['goal_dim']
        self.goal_bound = model_settings['goal_bound']
        self.a_dim = model_settings['action_dim']
        self.action_bound = model_settings['action_bound']
        self.learning_rate = model_settings['actor_learning_rate']
        self.tau = model_settings['actor_tau']
        self.batch_size = model_settings['minibatch_size']

        y_max = [y[1] for y in self.state_bound]
        y_min = [y[0] for y in self.state_bound]
        self._k_state = 2. / (np.subtract(y_max, y_min))
        self._b_state = -0.5 * np.add(y_max, y_min) * self._k_state

        y_max = [y[1] for y in self.goal_bound]
        y_min = [y[0] for y in self.goal_bound]
        self._k_goal = 2. / (np.subtract(y_max, y_min))
        self._b_goal = -0.5 * np.add(y_max, y_min) * self._k_goal

        # Model Dynamics Network
        with tf.variable_scope(self.name + '_model_dynamics'):
            self.inputs_state,\
            self.inputs_goal,\
            self.inputs_action,\
            self.state_out,\
            self.scaled_state_out,\
            self.goal_out,\
            self.scaled_goal_out\
                = self.create_model_dynamics_network()
            self.network_params = tf.trainable_variables(scope=self.name + '_model_dynamics')

        # Target Model Dynamics Network
        with tf.variable_scope(self.name + '_target_model_dynamics'):
            self.target_inputs_state,\
            self.target_inputs_goal,\
            self.target_inputs_action,\
            self.target_state_out,\
            self.target_scaled_state_out,\
            self.target_goal_out,\
            self.target_scaled_goal_out\
                = self.create_model_dynamics_network()
            self.target_network_params = tf.trainable_variables(scope=self.name + '_target_model_dynamics')

        # Op for periodically updating target network with online network
        # weights
        self.update_target_network_params = \
            [self.target_network_params[i].assign(tf.multiply(self.network_params[i], self.tau) +
                                                  tf.multiply(self.target_network_params[i], 1. - self.tau))
             for i in range(len(self.target_network_params))]

        self.copy_target_network_params = [self.target_network_params[i].assign(self.network_params[i])
                                    for i in range(len(self.target_network_params))]

        self.ground_truth_state_out = tf.placeholder(tf.float32, [None, self.s_dim])
        self.ground_truth_goal_out = tf.placeholder(tf.float32, [None, self.g_dim])
#         self.ground_truth_out = Concatenate()([self.ground_truth_state_out, self.ground_truth_goal_out])
        self.ground_truth_out = self.ground_truth_state_out

        self.scaled_out = self.scaled_state_out
#         self.scaled_out = Concatenate()([self.scaled_state_out, self.scaled_goal_out])

        # Optimization Op
        self.loss = tf.losses.mean_squared_error(self.ground_truth_out, self.scaled_out)
        self.optimize = tf.train.AdamOptimizer(
            self.learning_rate).minimize(self.loss)

        # Acion gradients extraction
        self.goal_loss = tf.losses.mean_squared_error(self.ground_truth_goal_out, self.scaled_goal_out)
        # self.actor_obj = tf.abs(tf.subtract(self.ground_truth_goal_out, self.scaled_goal_out))
        self.action_grads = tf.gradients(self.goal_loss, self.inputs_action)

        self.num_trainable_vars = len(
            self.network_params) + len(self.target_network_params)

    def create_model_dynamics_network(self):
        state_x = Input(batch_shape=[None, self.s_dim])
        goal_x = Input(batch_shape=[None, self.g_dim])
        action_x = Input(batch_shape=[None, self.a_dim])

        state_net = Dense(256)(state_x)
        state_net = BatchNormalization()(state_net)
        state_net = Activation('relu')(state_net)

        goal_net = Dense(256)(goal_x)
        goal_net = BatchNormalization()(goal_net)
        goal_net = Activation('relu')(goal_net)

        net = Concatenate()([state_net, goal_net])
        net  = state_net
        net = Dense(128, activation='relu')(net)

        action_net = Dense(128, activation='relu')(action_x)

        net = Concatenate()([net, action_net])
        net = Dense(64)(net)
        net = BatchNormalization()(net)
        net = Activation('tanh')(net)

        # state output branch
        state_y = Dense(self.s_dim,
                        activation='tanh',
                        kernel_initializer=RandomUniform(minval=-0.0003, maxval=0.0003)
                        )(net)
        state_y = Dense(self.s_dim)(state_y)
        state_y_scaled = state_y
        # state_y_scaled = tf.subtract(state_y, self._b_state)
        # state_y_scaled = tf.divide(state_y_scaled, self._k_state)

        # goal output branch
        goal_y = Dense(self.g_dim,
                        activation='tanh',
                        kernel_initializer=RandomUniform(minval=-0.0003, maxval=0.0003)
                        )(net)
        goal_y = Dense(self.g_dim)(goal_y)

        goal_y_scaled = goal_y
        # goal_y_scaled = tf.subtract(goal_y, self._b_goal)
        # goal_y_scaled = tf.divide(goal_y_scaled, self._k_goal)
        return state_x, goal_x, action_x, state_y, state_y_scaled, goal_y, goal_y_scaled

    def train(self, inputs_state, inputs_goal, inputs_action, ground_truth_state_out, ground_truth_goal_out):
        return self.sess.run([self.optimize, self.loss, self.goal_loss], feed_dict={
                self.inputs_state: inputs_state,
                self.inputs_action: inputs_action,
                self.inputs_goal: inputs_goal,
                self.ground_truth_state_out: ground_truth_state_out,
                self.ground_truth_goal_out: ground_truth_goal_out,
        })

    def action_gradients(self, inputs_state, inputs_goal, inputs_action, target_goal_out):
        return self.sess.run(self.action_grads, feed_dict={
            self.inputs_state: inputs_state,
            self.inputs_action: inputs_action,
            self.inputs_goal: inputs_goal,
            self.ground_truth_goal_out: target_goal_out
        })

    def predict(self, inputs_state, inputs_goal, inputs_action):
        return self.sess.run([self.scaled_state_out, self.scaled_goal_out], feed_dict={
            self.inputs_state: inputs_state,
            self.inputs_goal: inputs_goal,
            self.inputs_action: inputs_action
        })

    def predict_target(self, inputs_state, inputs_goal, inputs_action):
        return self.sess.run([self.target_scaled_state_out, self.target_scaled_goal_out], feed_dict={
            self.target_inputs_state: inputs_state,
            self.target_inputs_goal: inputs_goal,
            self.target_inputs_action: inputs_action
        })

    def update_target_network(self):
        self.sess.run(self.update_target_network_params)

    def init_target_network(self):
        self.sess.run(self.copy_target_network_params)

    def get_num_trainable_vars(self):
        return self.num_trainable_vars


def build_summaries():
    model_dynamics_loss = tf.Variable(0.)
    tf.summary.scalar("Model dynamics loss", model_dynamics_loss)

    model_dynamics_relative_loss = tf.Variable(0.)
    tf.summary.scalar("Model dynamics relative loss", model_dynamics_relative_loss)

    model_dynamics_goal_loss = tf.Variable(0.)
    tf.summary.scalar("Model dynamics goal loss", model_dynamics_goal_loss)

    actor_grads = tf.placeholder(dtype=tf.float32, shape=None)
    tf.summary.histogram('actor_gradients', actor_grads)

    variables = [v for v in tf.trainable_variables()]
    [tf.summary.histogram(v.name, v) for v in variables]

    summary_vars = [model_dynamics_loss, model_dynamics_relative_loss, model_dynamics_goal_loss ,actor_grads]

    summary_vars.extend(variables)
    summary_ops = tf.summary.merge_all()

    return summary_ops, summary_vars


class real_dynamics(object):
    def __init__(self, settings):
        with tf.variable_scope('real_model_dynamics'):
            self.action_x = Input(batch_shape=[None, settings['action_dim']])
            self.state_x = Input(batch_shape=[None, settings['state_dim']])
            self.ground_truth_goal_out = tf.placeholder(tf.float32, [None, settings['state_dim']])
            self.next_state = tf.add(self.state_x, self.action_x)
            y_max = np.tile([y[1] for y in settings['state_bound']], (settings['minibatch_size'], 1))
            y_min = np.tile([y[0] for y in settings['state_bound']], (settings['minibatch_size'], 1))
            self.next_state = tf.clip_by_value(self.next_state, y_min, y_max)
            self.goal_loss = tf.losses.mean_squared_error(self.ground_truth_goal_out, self.next_state)

            # self.actor_obj = tf.abs(tf.subtract(self.ground_truth_goal_out, self.scaled_goal_out))
            # self.action_grads = tf.gradients(self.goal_loss, self.action_x)
            self.action_grads = \
                tf.gradients(self.goal_loss, self.action_x)

    def action_gradients(self, action, state, ground_truth):
        sess = tf.get_default_session()
        return sess.run([self.action_grads, self.goal_loss], feed_dict={
            self.action_x: action,
            self.state_x: state,
            self.ground_truth_goal_out: ground_truth
        })

In [5]:
def train(settings, model_dynamics, env, replay_buffer, reference_trajectory, noise_sigma):

    # temp
    dm = real_dynamics(settings)

    sess = tf.get_default_session()
    summary_ops, summary_vars = build_summaries()

    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver(tf.global_variables())
    
    writer = tf.summary.FileWriter(settings['summary_dir'] + '/summary_' + dt, sess.graph)


    num_episodes = 10000
    action_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(env.action_dim), sigma=noise_sigma)
    s_dim = settings['state_dim']
    g_dim = settings['goal_dim']
    a_dim = settings['action_dim']
    for i in range(num_episodes):
        # pick random initial state from the reference trajectory
        s0_index = randrange(0, reference_trajectory.shape[0] - 1)
        if i % 200 == 0:
            s0_index = 0

        s0 = reference_trajectory[s0_index]
        g0 = s0
        env.reset(s0)

        r = []
        # rollout episode
        for j in range(s0_index, len(reference_trajectory) - 1):
            target = reference_trajectory[j + 1]
            # add noise
            action = action_noise()

            action = np.reshape(action, (a_dim))
            s1 = env.step(action)
            env.render()
            g1 = s1

            s1_expected = s0 + action
            err = np.mean(s1_expected - s1)
            # calc reward
            last_loss = np.linalg.norm(target - g1)

            r.append( -1. * np.linalg.norm(target - g1))
            replay_buffer.add(s0, g0, action, s1, g1, target)
            s0 = s1
            g0 = g1

            if last_loss > 4. and i % 200 != 0:
                break
        if i % 200 == 0:
            
            fname = videos_dir + '/episode_' + str(datetime.datetime.now().strftime("%m_%d_%Y_%I_%M_%p_%S"))
            env.dump_episode(fname)
        # train model_dynamics and policy
        minibatch_size = settings['minibatch_size']
        if replay_buffer.size() > minibatch_size:
            s0_batch, g0_batch, a_batch, s1_batch, g1_batch, target_batch = \
                replay_buffer.sample_batch(minibatch_size)

            # train model_dynamics
            ds = s1_batch - s0_batch
            dg = g1_batch - g0_batch
            
            md_loss = 0
            md_goal_loss = 0
#             _, md_loss, md_goal_loss = model_dynamics.train(s0_batch, g0_batch, a_batch, ds, dg)
            
            ds_pred, dg_pred = model_dynamics.predict(s0_batch, g0_batch, a_batch)
            
            md_loss = np.mean(np.linalg.norm(ds - ds_pred, axis = 1))
            relative_loss = md_loss / np.mean(np.linalg.norm(ds, axis = 1))
            if i % 200 == 0:
                ds_pred, dg_pred = model_dynamics.predict(s0_batch, g0_batch, a_batch)
                print(ds_pred[0] - ds[0])
                print(dg_pred[0] - dg[0])

            summary_str = sess.run(summary_ops, feed_dict={
                summary_vars[0]: md_loss,
                summary_vars[1]: relative_loss,
                summary_vars[2]: md_goal_loss,
                summary_vars[3]: 0
            })

            writer.add_summary(summary_str, i)
            writer.flush()

            print(' Episode: {:d} |'
                  ' Model dynamics loss: {:.4f}|'
                  ' Model dynamics relative loss: {:.4f}|'
                  ' MD goal loss: {:.4f}'.format(i,
                                                 md_loss,
                                                 relative_loss,
                                                 md_goal_loss))


In [6]:
speaker_fname = os.path.join(r'C:\Study\SpeechAcquisitionModel\src\VTL', 'JD2.speaker')
lib_path = os.path.join(r'C:\Study\SpeechAcquisitionModel\src\VTL', 'VocalTractLab2.dll')
ep_duration = 5000
timestep = 20
env = VTLEnv(lib_path, speaker_fname, timestep, max_episode_duration=ep_duration)

settings = {
        'state_dim': env.state_dim,
        'action_dim': env.action_dim,
        'state_bound': env.state_bound,
        'action_bound': env.action_bound,
        'goal_dim': env.state_dim,
        'goal_bound': env.state_bound,
        'episode_length': 40,
        'minibatch_size': 512,

        'actor_tau': 0.01,
        'actor_learning_rate': 0.0001,

        'summary_dir': summaries_dir
    }
noise_sigma = 0.01
with tf.Session() as sess:

    md = ModelDynamics('MD1', settings)
    replay_buffer = ReplayBuffer(100000)

    sess.run(tf.global_variables_initializer())
    reference_fname = r'C:\Study\SpeechAcquisitionModel\src\VTL\references\a_i.pkl'
    with open(reference_fname, 'rb') as f:
        (tract_params, glottis_params) = pickle.load(f)
        target_trajectory = np.hstack((np.array(tract_params), np.array(glottis_params)))
    
    train(settings, md, env, replay_buffer, target_trajectory, noise_sigma)

Compile date of the library: "Jun 15 2018"
INFO:tensorflow:Summary name Model dynamics loss is illegal; using Model_dynamics_loss instead.
INFO:tensorflow:Summary name Model dynamics relative loss is illegal; using Model_dynamics_relative_loss instead.
INFO:tensorflow:Summary name Model dynamics goal loss is illegal; using Model_dynamics_goal_loss instead.
INFO:tensorflow:Summary name MD1_model_dynamics/dense/kernel:0 is illegal; using MD1_model_dynamics/dense/kernel_0 instead.
INFO:tensorflow:Summary name MD1_model_dynamics/dense/bias:0 is illegal; using MD1_model_dynamics/dense/bias_0 instead.
INFO:tensorflow:Summary name MD1_model_dynamics/batch_normalization/gamma:0 is illegal; using MD1_model_dynamics/batch_normalization/gamma_0 instead.
INFO:tensorflow:Summary name MD1_model_dynamics/batch_normalization/beta:0 is illegal; using MD1_model_dynamics/batch_normalization/beta_0 instead.
INFO:tensorflow:Summary name MD1_model_dynamics/dense_1/kernel:0 is illegal; using MD1_model_dynami

bitrate tolerance 128000 too small for bitrate 8000000, overriding
Using AVStream.codec to pass codec parameters to muxers is deprecated, use AVStream.codecpar instead.


KeyboardInterrupt: 