In [1]:
# Code that helps avoid overusing memory

import tensorflow as tf
tf_config = tf.ConfigProto()
tf_config.gpu_options.allow_growth = True
sess = tf.Session(config=tf_config)
from keras import backend as K
K.set_session(sess)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
#Importing the VAE and RNN.
import os
import sys

#Adding WorldModels path to pythonpath
nb_dir = os.path.split(os.getcwd())[0]
print(nb_dir)
if nb_dir not in sys.path:
    sys.path.append(nb_dir)
    
#Importing the VAE
from VAE.world_model_vae import VAE
from RNN.world_model_rnn import RNN
import mdn

/home/kaiolae/workspace/world_models/WorldModels


In [3]:
LATENT_SPACE_DIMENSIONALITY = 16
NUM_MIXTURES = 5

In [4]:
import matplotlib.pyplot as plt
from matplotlib import animation
from IPython.display import display, HTML


def plot_movie_mp4(image_array):
    dpi = 2.0
    #xpixels, ypixels = image_array[0].shape[0], image_array[0].shape[1]
    #fig = plt.figure(figsize=(ypixels/dpi, xpixels/dpi), dpi=dpi)
    fig = plt.figure(figsize=(1,1), dpi=dpi)
    im = plt.figimage(image_array[0])

    def animate(i):
        im.set_array(image_array[i])
        return (im,)

    anim = animation.FuncAnimation(fig, animate, frames=len(image_array))
    display(HTML(anim.to_html5_video()))

In [5]:
#Loading Weights of trained VAE and RNN
vae = VAE()
vae.set_weights("../models/final_full_vae_weights.h5")

rnn = RNN(decoder_mode=True)
rnn.set_weights("../rnn-model-from-all-data/rnn_trained_model.h5")

untrained_rnn = RNN(decoder_mode=True)

VAE init
Shape before flattening: (None, 16, 16, 32)
KL Shape: (None,)
Xent shape: ()
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      (None, 64, 64, 3)    0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 64, 64, 3)    39          encoder_input[0][0]              
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 32, 32, 32)   416         conv2d_1[0][0]                   
__________________________________________________________________________________________________
conv2d_3 (Conv2D)               (None, 16, 16, 32)   4128        conv2d_2[0][0]                   
_______________________

In [32]:
def condition_and_dream(model, real_video_latents, n_mixtures, dream_length, temp=1.0):
    """Conditions the network on a sequence of latent vectors, then dreams up new ones.
    To be used with a decoder model, 1 input, 1 output, stateful."""
    dream_steps = 0
    # condition on all inputs, before starting to dream.
    for latent in real_video_latents:
        latent = np.array([[latent]]) #Network expects 3D input, with seq-len 1, batch-size 1
        mixture_params = model.predict(latent)
        previous_latent = mdn.sample_from_output(mixture_params[0], 
                                                 LATENT_SPACE_DIMENSIONALITY, 
                                                 n_mixtures, temp=temp)
    output = [previous_latent[0]] #The final output after the real image sequence has passed through.
    #dreaming further with the conditioned network.
    while (dream_steps < dream_length):
        #Adding dummy action to previous_latent. TODO Real action?
        rnn_input=np.append(previous_latent[0],0)
        rnn_input=np.array([[rnn_input]])
        params = model.predict(rnn_input)
        previous_latent = mdn.sample_from_output(params[0], LATENT_SPACE_DIMENSIONALITY, n_mixtures, temp=temp)
        output.append(previous_latent[0])
        dream_steps += 1
    net_output = np.array(output)
    return net_output

In [6]:
#Get a start video
#Getting data to feed into the VAE and RNN
import numpy as np
data = np.load("../rnn-data/rnn_training_data.npz")
action_file = data['action']
latent_file = data['latent']

single_action_sequence = action_file[6]
single_latent_sequence = latent_file[6]
print("Actions length: ", len(single_action_sequence))
print("Latent vectors length: ", len(single_latent_sequence))




#Decode all 100 vectors, store as video
#Visualize video.

Actions length:  300
Latent vectors length:  300


In [7]:
#Decode a sequence with the VAE and visualize it
def decode_and_visualize(latent_vector_sequence):
    reconstructions = vae.decoder.predict(np.array(latent_vector_sequence))
    plot_movie_mp4(reconstructions)

### Visualizing one sequence of latent vectors from the training data

In [8]:
decode_and_visualize(single_latent_sequence)

### Starting with one z-vector, unroll that N steps into the future, storing all

In [36]:
#For now, testing with REAL z-and a input each time. Later: Test longer-term prediciton.
rnn_input_sequence = []
for timestep in range(len(single_latent_sequence)):
    rnn_input_sequence.append(np.concatenate([single_latent_sequence[timestep], [single_action_sequence[timestep]]]))
#Predict next z-vector, 100 times - storing all.
#Input: Last 30 timesteps. Output: next predicted image.
#TODO: Set up something like the startrek RNN, which could decode
#1-to-1 by having internal state.
#Slicing Input sequence into overlapping length 30 pieces
step_size = 1
SEQ_SIZE = 30
rnn_input_data = []
for j in range(0, len(rnn_input_sequence)-SEQ_SIZE, step_size):
    rnn_input_data.append(rnn_input_sequence[j:j+SEQ_SIZE])

rnn_input_data = np.array(rnn_input_data)
print("Input shape: ", rnn_input_data.shape)

rnn.model.reset_states()
rnn_input_sequence = np.array(rnn_input_sequence)
print("RNN input seq shape: ", rnn_input_sequence.shape)
dreamed_latents = condition_and_dream(rnn.model, rnn_input_sequence, NUM_MIXTURES, 100)
#mdn_outputs = rnn.model.predict(rnn_input_data)
#for input in rnn_input_sequence:
#    prediction = rnn.model.predict(input)
#    rnn_input_sequence.append(prediction)
dreamed_latents = np.array(dreamed_latents)
print("Dream shape: ", dreamed_latents.shape)

#Dreaming with the untrained model
untrained_rnn.model.reset_states()
dreamed_untrained_latents = condition_and_dream(untrained_rnn.model, rnn_input_sequence, NUM_MIXTURES, 100)

Input shape:  (270, 30, 17)
RNN input seq shape:  (300, 17)
Dream shape:  (101, 16)


In [37]:
#Visualizing the dream
decode_and_visualize(dreamed_latents)
#Visualizing the untrained dream
decode_and_visualize(dreamed_untrained_latents)

In [None]:
#Sampling from the generated MDN models, to generate latent 
#vectors.
#TODO Decide what sampling temperature to use. 1 seems reasonable?
generated_latent_vectors = []
for mdn_timestep in mdn_outputs:
    sampled_latent = mdn.sample_from_output(mdn_timestep,LATENT_SPACE_DIMENSIONALITY,
                                           NUM_MIXTURES, temp=1.0)
    generated_latent_vectors.append(sampled_latent[0])
generated_latent_vectors = np.array(generated_latent_vectors)
print("Sampled latent vectors shape: ", generated_latent_vectors.shape)

In [None]:
#Testing an untrained RNN to see the effect of training.
untrained_outputs = untrained_rnn.model.predict(rnn_input_data)
untrained_latent_vectors = []
for timestep in untrained_outputs:
    sampled_latent = mdn.sample_from_output(timestep,LATENT_SPACE_DIMENSIONALITY,
                                           NUM_MIXTURES, temp=1.0)
    untrained_latent_vectors.append(sampled_latent[0])
untrained_latent_vectors = np.array(untrained_latent_vectors)
print("Untrained latent vectors shape: ", untrained_latent_vectors.shape)

In [35]:
#Decoding the sampled latent vectors into images, and visualizing
decode_and_visualize(single_latent_sequence)
#decode_and_visualize(generated_latent_vectors)
decode_and_visualize(untrained_latent_vectors)
#Visualizing the dream
decode_and_visualize(dreamed_latents)


NameError: name 'untrained_latent_vectors' is not defined

In [None]:
#How do I show current step and predicted next step nicely side-by-side?
TIMESTEP = 100
decoded_current = vae.decoder.predict(single_latent_sequence)
decoded_future = vae.decoder.predict(generated_latent_vectors)
#Decoding the exact input we gave at step TIMESTEP.
decoded_input_data = vae.decoder.predict(rnn_input_data[TIMESTEP][:,:-1])
print("Current image: ")
#The future is SEQ_SIZE timesteps ahead
plt.imshow(decoded_current[TIMESTEP+SEQ_SIZE]) 
plt.show()
print("Predicted future: ")
plt.imshow(decoded_future[TIMESTEP])
plt.show()
print("Actual RNN input: ")
for img in decoded_input_data:
    plt.imshow(img)
    plt.show()

In [None]:
#How can I plot the current and predicted latent vectors side-
#by-side?
for timestep in range(len(generated_latent_vectors)):
    print("Current: ", rnn_input_data[timestep][-1])
    print("Predicted: ", generated_latent_vectors[timestep])