In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Sequential
import tensorflow.keras.backend as K
import numpy as np
import os
from utils import preprocess_images

In [None]:
from models import Encoder as buildVision
from models import M as buildMemory

In [None]:
import mdn

In [None]:
V = buildVision()

In [None]:
M = buildMemory()
get_hidden = K.function(M.layers[0].input, M.layers[0].output)


In [None]:
# some_z = np.load('./sausage/z_states/z_state0_1005.npy')

# some_a = np.array([1,0,0])

# combined = np.concatenate([some_z, some_a], axis=0)

# #inputs = np.zeros((128, *combined.shape))
# #inputs[0] = combined

# #inputs.shape

# M.reset_states()

# M.layers[0]

# foo = get_hidden(tf.expand_dims(tf.expand_dims(combined, 0), 0))

# bar = M(tf.expand_dims(c, 0))

# np.apply_along_axis(mdn.sample_from_output, 1, bar[0], 32, 5, temp=1.0).shape

In [None]:
class Controller():
    def __init__(self, input_size, output_size):
        self._in = input_size
        self._out = output_size
        self.W = np.random.randn(input_size, output_size)
    
    def clip(self, x, lo=0.0, hi=1.0):
        return np.minimum(np.maximum(x, lo), hi)
    
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def __call__(self, obs):
        action = np.dot(obs, self.W)
        
        action[0] = np.tanh(action[0])
        action[1] = self.sigmoid(action[1])
        action[2] = self.clip(np.tanh(action[2]))
        
        return action
    
    def set_weights(self, W):
        # assume W is flat.
        self.W = np.reshape(W, self.W.shape)
        
    def randomly_init(self):
        self.W = np.random.randn(*self.W.shape)
       
    @property
    def shape(self):
        return self.W.shape

In [None]:
controller = Controller(32+256, 3)

In [None]:
import gym

In [None]:
env = gym.make("CarRacing-v0")

In [None]:
state = preprocess_images(env.reset())
env.close()

In [None]:
def rollout(controller):
    #ims = []
    state = preprocess_images(env.reset())
    
    M.reset_states()
    h = np.zeros(256)
    done = False
    cumulative_reward = 0
    
    while not done:
        _state = np.zeros((128, 64, 64, 3))
        _state[0] = state
        #ims.append(state)
        z = V(_state)[0][0] #extract first from batch and sequence
        
        # combine V latent space with M hidden space 
        combined = np.concatenate([z, h], axis=0)
        
        a = controller(combined)
        
        state, reward, done, info = env.step(a)
        state = preprocess_images(state)
        
        cumulative_reward += reward
        
        # get factored gaussians
        # by feeding current latent_state + action
        z = M(tf.expand_dims(tf.expand_dims(np.concatenate([z, a]), 0), 0))
        
        # sample from factored gaussians
        # 32 = output_dims
        # 5  = num_mixtures
        z = np.apply_along_axis(mdn.sample_from_output, 1, z[0], 32, 5, temp=1.0).squeeze()

        # extract hidden state from LSTM
        h = get_hidden(tf.expand_dims(tf.expand_dims(np.concatenate([z, a], 0), 0), 0)).squeeze()
    
    env.close()
    return cumulative_reward#, ims

In [None]:
from es import SimpleGA, CMAES, PEPG, OpenES

In [None]:
NPARAMS = controller.shape[0] * controller.shape[1]
NPOPULATION = 26    # use population size of 101.
MAX_ITERATION = 250 # run each solver for 1000 generations.

In [None]:
def fit_func(params):
    controller.set_weights(params)
    reward = rollout(controller)
    return reward

In [None]:
def test_solver(solver):
    history = []
    for j in range(MAX_ITERATION):
        solutions = solver.ask()
        fitness_list = np.zeros(solver.popsize)
        for i in range(solver.popsize):
            fitness_list[i] = fit_func(solutions[i])
        solver.tell(fitness_list)
        result = solver.result() # first element is the best solution, second element is the best fitness
        history.append(result[1])
        if (j+1) % 100 == 0:
            print("fitness at iteration", (j+1), result[1])
    print("local optimum discovered by solver:\n", result[0])
    print("fitness score at this local optimum:", result[1])
    return history, result

In [None]:
# defines OpenAI's ES algorithm solver. Note that we needed to anneal the sigma parameter
oes = OpenES(NPARAMS,                  # number of model parameters
            sigma_init=0.5,            # initial standard deviation
            sigma_decay=0.999,         # don't anneal standard deviation
            learning_rate=0.1,         # learning rate for standard deviation
            learning_rate_decay = 1.0, # annealing the learning rate
            popsize=NPOPULATION,       # population size
            antithetic=False,          # whether to use antithetic sampling
            weight_decay=0.00,         # weight decay coefficient
            rank_fitness=False,        # use rank rather than fitness numbers
            forget_best=False)

In [None]:
oes_history, result = test_solver(oes)

In [None]:
controller.set_weights(result)

In [None]:
np.save('./sausage/weights/C_weights.npy', controller.W)

In [None]:
env.close()

In [None]:
import matplotlib.pyplot as plt
from IPython import display

In [None]:
def show_state(env, step=0, name="", info="", image=None):
    """Fn to visualize the agent playing the game in a notebook
    """
    plt.figure(10)
    plt.clf()
    if image is not None:
        im = image
    else:
        im = env.render(mode="rgb_array")[0]
    plt.imshow(im)
    plt.title("{} | Step: {} {}".format(name, step, info))
    plt.axis('off')
    display.clear_output(wait=True)
    display.display(plt.gcf())

In [None]:
for i in ims:
    show_state(None, image=i[0])

In [None]:
np.save("AICAR.npy", ims)