In [2]:
import numpy as np
import os
import json
import tensorflow as tf
import random
import time

from vaegan_rnn import MDNRNN, sample_vae
from utils import PARSER

In [3]:
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpu_devices:
    tf.config.experimental.set_memory_growth(gpu, True)
np.set_printoptions(precision=4, edgeitems=6, linewidth=100, suppress=True)

In [4]:
DATA_DIR = "results/WorldModels/CarRacing-v0/vaegan_series"
model_save_path = "results/WorldModels/CarRacing-v0/vaegan_rnn"

In [6]:
if not os.path.exists(model_save_path):
  os.makedirs(model_save_path)
#with open(model_save_path + '/args.json', 'w') as f:
#    json.dump(args.__dict__, f, indent=2)
raw_data = np.load(os.path.join(DATA_DIR, "series.npz"))

In [7]:
data_mu = raw_data["mu"]
data_logvar = raw_data["logvar"]
data_action =  raw_data["action"]
data_r = raw_data["reward"]
data_d = raw_data["done"]
data_N = raw_data["N"]
N_data = len(data_mu) # should be 10k

In [8]:
print(N_data)

640


In [9]:
def ds_gen():
  for _ in range(4000):
    indices = np.random.permutation(N_data)[0:5]
    # suboptimal b/c we are always only taking first set of steps
    mu = data_mu[indices][:, :1000] 
    logvar = data_logvar[indices][:, :1000]
    action = data_action[indices][:, :1000]
    z = sample_vae(mu, logvar)
    r = tf.cast(data_r[indices], tf.float16)[:, :1000]
    d = tf.cast(data_d[indices], tf.float16)[:, :1000]
    N = tf.cast(data_N[indices], tf.float16)[:, :1000]
    yield z, action, r, d, N

In [10]:
dataset = tf.data.Dataset.from_generator(ds_gen, output_types=(tf.float16, tf.float16, tf.float16, tf.float16, tf.float16), \
    output_shapes=((5, 1000, 512), \
    (5, 1000, 3), \
    (5, 1000, 1), \
    (5, 1000, 1), \
    (5, 1000, 1)))
dataset = dataset.prefetch(10)
tensorboard_dir = os.path.join(model_save_path, 'tensorboard')
summary_writer = tf.summary.create_file_writer(tensorboard_dir)
summary_writer.set_as_default()
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=tensorboard_dir, write_graph=False)

In [11]:
rnn = MDNRNN()
rnn.compile(optimizer=rnn.optimizer, loss=rnn.get_loss())
tensorboard_callback.set_model(rnn)

In [12]:
# train loop:
start = time.time()
step = 0
for raw_z, raw_a, raw_r, raw_d, raw_N in dataset:
    curr_learning_rate = (0.001-0.00001) * (1.0) ** step + 0.00001
    rnn.optimizer.learning_rate = curr_learning_rate
    
    inputs = tf.concat([raw_z, raw_a], axis=2)

    if step == 0:
        rnn._set_inputs(inputs)

    dummy_zero = tf.zeros([raw_z.shape[0], 1, raw_z.shape[2]], dtype=tf.float16)
    z_targ = tf.concat([raw_z[:, 1:, :], dummy_zero], axis=1) # zero pad the end but we don't actually use it
    z_mask = 1.0 - raw_d
    z_targ = tf.concat([z_targ, z_mask], axis=2) # use a signal to not pass grad

    outputs = {'MDN': z_targ}
    '''
    if args.rnn_r_pred == 1:
        r_mask = tf.concat([tf.ones([5, 1, 1], dtype=tf.float16), 1.0 - raw_d[:, :-1, :]], axis=1)
        r_targ = tf.concat([raw_r, r_mask], axis=2)
        outputs['r'] = r_targ
    if args.rnn_d_pred == 1:
        d_mask = tf.concat([tf.ones([5, 1, 1], dtype=tf.float16), 1.0 - raw_d[:, :-1, :]], axis=1)
        d_targ = tf.concat([raw_d, d_mask], axis=2)
        outputs['d'] = d_targ
    '''
    loss = rnn.train_on_batch(x=inputs, y=outputs, return_dict=True)
    [tf.summary.scalar(loss_key, loss_val, step=step) for loss_key, loss_val in loss.items()]

    if (step%20==0 and step > 0):
        end = time.time()
        time_taken = end-start
        start = time.time()
        output_log = "step: %d, train_time_taken: %.4f, lr: %.6f" % (step, time_taken, curr_learning_rate)
        for loss_key, loss_val in loss.items():
            output_log += ', {}: {:.4f}'.format(loss_key, loss_val)
        print(output_log)
    if (step%1000==0 and step > 0):
        tf.keras.models.save_model(rnn, model_save_path, include_optimizer=True, save_format='tf')
    step += 1

Instructions for updating:
`scale_identity_multiplier` is deprecated; please combine it with `scale_diag` directly instead.
Instructions for updating:
Do not pass `graph_parents`.  They will  no longer be used.


To change all layers to have dtype float16 by default, call `tf.keras.backend.set_floatx('float16')`. To change just this layer, pass dtype='float16' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

step: 20, train_time_taken: 7.6290, lr: 0.001000, loss: 1.1854
step: 40, train_time_taken: 2.1516, lr: 0.001000, loss: 1.0582
step: 60, train_time_taken: 2.1479, lr: 0.001000, loss: 0.9677
step: 80, train_time_taken: 2.1566, lr: 0.001000, loss: 0.9312
step: 100, train_time_taken: 2.1469, lr: 0.001000, loss: 0.9794
step: 120, train_time_taken: 2.1395, lr: 0.001000, loss: 0.9602
step: 140, train_time_taken: 2.1512, lr: 0.001000, loss: 0.8953
step: 160, train_time_taken: 2.1357, lr: 0.0

step: 1960, train_time_taken: 2.1597, lr: 0.001000, loss: 0.7476
step: 1980, train_time_taken: 2.1624, lr: 0.001000, loss: 0.7546
step: 2000, train_time_taken: 2.1552, lr: 0.001000, loss: 0.7834
INFO:tensorflow:Assets written to: results/WorldModels/CarRacing-v0/vaegan_rnn/assets
step: 2020, train_time_taken: 5.7942, lr: 0.001000, loss: 0.8443
step: 2040, train_time_taken: 2.1621, lr: 0.001000, loss: 0.7424
step: 2060, train_time_taken: 2.1543, lr: 0.001000, loss: 0.8144
step: 2080, train_time_taken: 2.1712, lr: 0.001000, loss: 0.7553
step: 2100, train_time_taken: 2.1556, lr: 0.001000, loss: 0.8312
step: 2120, train_time_taken: 2.1446, lr: 0.001000, loss: 0.7023
step: 2140, train_time_taken: 2.1560, lr: 0.001000, loss: 0.7177
step: 2160, train_time_taken: 2.1521, lr: 0.001000, loss: 0.7916
step: 2180, train_time_taken: 2.1615, lr: 0.001000, loss: 0.7594
step: 2200, train_time_taken: 2.1583, lr: 0.001000, loss: 0.7426
step: 2220, train_time_taken: 2.1523, lr: 0.001000, loss: 0.7855
step