In [None]:
import time

import tensorflow as tf
import tensorflow.keras.backend as K
import numpy as np
import os
from utils import preprocess_images

In [None]:
from models import V as buildVision
from models import V_inverse as Decoder
from models import M as buildMemory
from models import Controller

In [None]:
import mdn

In [None]:
import matplotlib.pyplot as plt
from IPython import display

In [None]:
V = buildVision()
V_inv = Decoder()

In [None]:
V.load_weights('weights/2019.12.07/encoder_weights')
V_inv.load_weights('weights/2019.12.07/decoder_weights')

In [None]:
V.summary()

In [None]:
V_inv.summary()

In [None]:
M = buildMemory('weights/2019.12.07/mdn_rnn_weights')
get_hidden = K.function(M.layers[0].input, M.layers[0].output)

In [None]:
M.summary()

In [None]:
controller = Controller(32+256, 3)
controller.set_weights(np.load('./weights/C_weights.npy'))

$$\text{Controller}: \mathbb R^{288} \rightarrow \mathbb R^3 $$

In [None]:
controller.shape

In [None]:
import gym

In [None]:
env = gym.make("CarRacing-v0")

In [None]:
state = preprocess_images(env.reset())
env.close()

In [None]:
def rollout(controller, playback=False):
    if playback:
        ims = []
    state = preprocess_images(env.reset())
    
    M.reset_states()
    h = np.zeros(256)
    done = False
    cumulative_reward = 0
    
    while not done:
        _state = np.zeros((128, 64, 64, 3))
        _state[0] = state
        
        if playback:
            ims.append(state)
        z = V(_state)[2][0] #extract z and first from sequence

        # combine V latent space with M hidden space 
        combined = np.concatenate([z, h], axis=0)
        
        a = controller(combined)
        
        state, reward, done, info = env.step(a)
        state = preprocess_images(state)
        
        cumulative_reward += reward
        
        # extract hidden state from LSTM
        h = get_hidden(tf.expand_dims(tf.expand_dims(np.concatenate([z, a], 0), 0), 0)).squeeze()
        
        # get factored gaussians
        # by feeding current latent_state + action
        z = M(tf.expand_dims(tf.expand_dims(np.concatenate([z, a]), 0), 0))
        
        # sample from factored gaussians
        # 32 = output_dims
        # 5  = num_mixtures
        z = np.apply_along_axis(mdn.sample_from_output, 1, z[0], 32, 5, temp=1.0).squeeze()
    
    env.close()
    if playback:
        return cumulative_reward, ims
    return cumulative_reward

In [None]:
start = time.time()
r, ims = rollout(controller, playback=True)
end = time.time() - start

In [None]:
r

In [None]:
def show_state(env, step=0, name="", info="", image=None):
    """Fn to visualize the agent playing the game in a notebook
    """
    plt.figure(10)
    plt.clf()
    if image is not None:
        im = image
    else:
        im = env.render(mode="rgb_array")[0]
    plt.imshow(im)
    plt.title("{} | Step: {} {}".format(name, step, info))
    plt.axis('off')
    display.clear_output(wait=True)
    display.display(plt.gcf())

In [None]:
for i in ims:
    show_state(None, image=i[0])

In [None]:
np.save("AICAR.npy", ims)