In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%matplotlib inline
import numpy as np
from rnn.arch import RNN
from vae.arch import VAE
import matplotlib.pyplot as plt
from gym.utils import seeding
from IPython import display
import time
from model import make_model

import config

np.set_printoptions(precision=4, suppress = True)

In [3]:
def get_mixture_coef(z_pred):

    log_pi, mu, log_sigma = np.split(z_pred, 3, 1)
    log_pi = log_pi - np.log(np.sum(np.exp(log_pi), axis = 1, keepdims = True))

    return log_pi, mu, log_sigma

def get_pi_idx(x, pdf):
    # samples from a categorial distribution
    N = pdf.size
    accumulate = 0
    for i in range(0, N):
        accumulate += pdf[i]
        if (accumulate >= x):
            return i
    random_value = np.random.randint(N)
    #print('error with sampling ensemble, returning random', random_value)
    return random_value

def sample_z(mu, log_sigma):
    z =  mu + (np.exp(log_sigma)) * np.random.randn(*log_sigma.shape) 
    return z


def get_z_from_rnn_output(y_pred):
    HIDDEN_UNITS = 256
    GAUSSIAN_MIXTURES = 5
    Z_DIM = 32
    d = GAUSSIAN_MIXTURES * Z_DIM

    z_pred = y_pred[:(3*d)]
    rew_pred = y_pred[-1]

    z_pred = np.reshape(z_pred, [-1, GAUSSIAN_MIXTURES * 3])

    log_pi, mu, log_sigma = get_mixture_coef(z_pred)

    chosen_log_pi = np.zeros(Z_DIM)
    chosen_mu = np.zeros(Z_DIM)
    chosen_log_sigma = np.zeros(Z_DIM)

    # adjust temperatures
    logmix2 = np.copy(log_pi)
    logmix2 -= logmix2.max()
    logmix2 = np.exp(logmix2)
    logmix2 /= logmix2.sum(axis=1).reshape(Z_DIM, 1)


    for j in range(Z_DIM):
        idx = get_pi_idx(np.random.rand(), logmix2[j])
        chosen_log_pi[j] = idx
        chosen_mu[j] = mu[j, idx]
        chosen_log_sigma[j] = log_sigma[j,idx]

    next_z = sample_z(chosen_mu, chosen_log_sigma)

    # print(next_z)
    # print(rew_pred)
    if rew_pred > 0:
        next_reward = 1
    else:
        next_reward = 0
        
    return next_z, next_reward, chosen_mu

In [5]:
rollout_files = np.load('./data/rollout/1735079945.npz') 
obs_file = rollout_files['obs']
action_file = rollout_files['action']
reward_file = rollout_files['reward']
done_file = rollout_files['done']

series_files = np.load('./data/series/1735079945.npz') 
mu_file = series_files['mu']
log_var_file = series_files['log_var']
action_2_file = series_files['action']
reward_2_file = series_files['reward']
done_2_file = series_files['done']


In [6]:

model = make_model()
model.make_env('car_racing')
model.load_model('./controller/car_racing.cma.4.32.current.json')

dream_env = make_model()
dream_env.make_env('car_racing_dream', model = make_model())




FileNotFoundError: [Errno 2] No such file or directory: './controller/car_racing.cma.4.32.current.json'

In [None]:
action = [0,1,0]
reward = 0
total_reward = 0
total_dream_reward = 0
t = 0

model.reset()

obs = model.env.reset()
dream_obs = dream_env.env.reset()

model_out = model.env.render('rgb_array')
dream_env.env.render('rgb_array')

actions0 = []
actions1 = []
actions2 = []

In [None]:
action_direction = []
action_accel = []
action_brake = []

rewards = []
pred_rewards = []
dream_rewards = []

while (t<100):
    
    f = plt.figure()
    
    if obs.shape == model.vae.input_dim: ### running in real environment
        obs = config.adjust_obs(obs)
        reward = config.adjust_reward(reward)
    
    # CURRENT REAL IMAGE
    f.add_subplot(2,3, 1)
    plt.imshow(obs)
    
    # CURRENT DREAM IMAGE
    f.add_subplot(2,3, 4)
    decoded_dream_obs = model.vae.decoder.predict(np.array([dream_obs]))[0]
    plt.imshow(decoded_dream_obs)
    
    
    vae_encoded_obs = model.update(obs, t)

    input_to_rnn = [np.array([[np.concatenate([vae_encoded_obs, action, [reward]])]]),np.array([model.hidden]),np.array([model.cell_values])]
    
    out = model.rnn.forward.predict(input_to_rnn)

    y_pred = out[0][0][0]
    h = out[1][0]
    c = out[2][0]

    model.hidden = h
    model.cell_values = c

    next_z, next_reward, chosen_mu = get_z_from_rnn_output(y_pred)

    recon_next_z = model.vae.decoder.predict(np.array([next_z]))[0]
    
    
    
    #PREDICTED NEXT IMAGE (DECODED FROM RNN)
    f.add_subplot(2,3, 2)
    plt.imshow(recon_next_z)

    controller_obs = np.concatenate([vae_encoded_obs,model.hidden])

    action = model.get_action(controller_obs, t=0, add_noise=0)
#     action = [0,1,0]

    action_direction.append(action[0])
    action_accel.append(action[1])
    action_brake.append(action[2])

    print('Time', t)
    print('Action', action)
    print('Predicted New Reward', next_reward)
    
    pred_rewards.append(next_reward)


    obs, reward, done, _ = model.env.step(action)
    
    
    
    
    # REAL NEXT OBS
    f.add_subplot(2,3, 3)
    plt.imshow(obs)
    total_reward+= reward

    print('New reward', reward)
    print('Total reward', total_reward)
    
    rewards.append(reward)



    dream_obs, dream_reward, dream_done, _ = dream_env.env.step(action)

    
    decoded_dream_obs = model.vae.decoder.predict(np.array([dream_obs]))[0]

    
    # DECODED DREAM NEXT IMAGE
    f.add_subplot(2,3, 6)
    plt.imshow(decoded_dream_obs)
    total_dream_reward+= dream_reward

    print('Dream reward', dream_reward)
    print('Dream total reward', total_dream_reward)
    
    dream_rewards.append(dream_reward)
    

#     display.clear_output(wait=True)
    display.display(plt.gcf()) 
    time.sleep(0.1)
    
    




    t += 1



#     print(action)

In [None]:
plt.figure(figsize = (15,6))
plt.plot(action_direction)
plt.plot(action_accel)
plt.plot(action_brake)
plt.plot(rewards)
plt.plot(pred_rewards)
# plt.plot(dream_rewards)
plt.legend(['action_direction'
            ,'action_accel'
            ,'action_brake'
            ,'rewards'
            ,'pred_rewards'
#             ,'dream_rewards'
           ])
plt.show()