In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Sequential
import tensorflow.keras.backend as K
import numpy as np
import os
from utils import preprocess_images

In [2]:
from models import V as buildVision
from models import V_inverse as Decoder
from models import M as buildMemory

In [3]:
import mdn

In [4]:
from utils import load_folder

In [5]:
import matplotlib.pyplot as plt

In [6]:
V = buildVision()
V_inv = Decoder()

In [7]:
V.load_weights('weights/2019.12.07/encoder_weights')
V_inv.load_weights('weights/2019.12.07/decoder_weights')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fd5dc77d2d0>

In [8]:
V.summary()

Model: "encoder"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      [(None, 64, 64, 3)]  0                                            
__________________________________________________________________________________________________
enc_conv1 (Conv2D)              (None, 31, 31, 32)   1568        encoder_input[0][0]              
__________________________________________________________________________________________________
enc_conv2 (Conv2D)              (None, 14, 14, 64)   32832       enc_conv1[0][0]                  
__________________________________________________________________________________________________
enc_conv3 (Conv2D)              (None, 6, 6, 128)    131200      enc_conv2[0][0]                  
____________________________________________________________________________________________

In [9]:
V_inv.summary()

Model: "decoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
decoder_input (InputLayer)   [(None, 32)]              0         
_________________________________________________________________
dec_fc (Dense)               (None, 1024)              33792     
_________________________________________________________________
reshape_1 (Reshape)          (None, 1, 1, 1024)        0         
_________________________________________________________________
dec_deconv1 (Conv2DTranspose (None, 5, 5, 128)         3276928   
_________________________________________________________________
dec_deconv2 (Conv2DTranspose (None, 13, 13, 64)        204864    
_________________________________________________________________
dec_deconv3 (Conv2DTranspose (None, 30, 30, 32)        73760     
_________________________________________________________________
dec_deconv4 (Conv2DTranspose (None, 64, 64, 3)         3459

In [10]:
M = buildMemory('weights/2019.12.07/mdn_rnn_weights')
get_hidden = K.function(M.layers[0].input, M.layers[0].output)

In [None]:
# tf.keras.backend.set_floatx('float64')

In [11]:
M.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, None, 256)         299008    
_________________________________________________________________
mdn (MDN)                    (None, None, 325)         83525     
Total params: 382,533
Trainable params: 382,533
Non-trainable params: 0
_________________________________________________________________


In [12]:
class Controller():
    def __init__(self, input_size, output_size):
        self._in = input_size
        self._out = output_size
        self.W = np.random.randn(input_size, output_size)
    
    def clip(self, x, lo=0.0, hi=1.0):
        return np.minimum(np.maximum(x, lo), hi)
    
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def __call__(self, obs):
        action = np.dot(obs, self.W)
        
        action[0] = np.tanh(action[0])
        action[1] = self.sigmoid(action[1])
        action[2] = self.clip(np.tanh(action[2]))
        
        return action
    
    def set_weights(self, W):
        # assume W is flat.
        self.W = np.reshape(W, self.W.shape)
        
    def randomly_init(self):
        self.W = np.random.randn(*self.W.shape)
       
    @property
    def shape(self):
        return self.W.shape

In [13]:
controller = Controller(32+256, 3)

In [14]:
import gym

In [15]:
env = gym.make("CarRacing-v0")

In [16]:
state = preprocess_images(env.reset())
env.close()

Track generation: 1208..1514 -> 306-tiles track


In [17]:
def rollout(controller):
    #ims = []
    state = preprocess_images(env.reset())
    
    M.reset_states()
    h = np.zeros(256)
    done = False
    cumulative_reward = 0
    
    while not done:
        _state = np.zeros((128, 64, 64, 3))
        _state[0] = state
        #ims.append(state)
        z = V(_state)[2][0] #extract first from batch and sequence

        # combine V latent space with M hidden space 
        combined = np.concatenate([z, h], axis=0)
        
        a = controller(combined)
        
        state, reward, done, info = env.step(a)
        state = preprocess_images(state)
        
        cumulative_reward += reward
        
        # get factored gaussians
        # by feeding current latent_state + action
        z = M(tf.expand_dims(tf.expand_dims(np.concatenate([z, a]), 0), 0))
        
        # sample from factored gaussians
        # 32 = output_dims
        # 5  = num_mixtures
        z = np.apply_along_axis(mdn.sample_from_output, 1, z[0], 32, 5, temp=1.0).squeeze()

        # extract hidden state from LSTM
        h = get_hidden(tf.expand_dims(tf.expand_dims(np.concatenate([z, a], 0), 0), 0)).squeeze()
    
    env.close()
    return cumulative_reward#, ims

In [21]:
import time

In [22]:
start = time.time()
rollout(controller)
end = time.time() - start

Track generation: 1098..1385 -> 287-tiles track


In [30]:
end * 50 * 10 // 3600

11.0

In [31]:
from es import SimpleGA, CMAES, PEPG, OpenES

In [32]:
NPARAMS = controller.shape[0] * controller.shape[1]
NPOPULATION = 10    # use population size of 101.
MAX_ITERATION = 50 # run each solver for 1000 generations.

In [33]:
def fit_func(params):
    controller.set_weights(params)
    reward = rollout(controller)
    return reward

In [34]:
def test_solver(solver):
    history = []
    for j in tqdm(range(MAX_ITERATION)):
        solutions = solver.ask()
        fitness_list = np.zeros(solver.popsize)
        for i in range(solver.popsize):
            fitness_list[i] = fit_func(solutions[i])
        solver.tell(fitness_list)
        result = solver.result() # first element is the best solution, second element is the best fitness
        history.append(result[1])
        if (j+1) % 100 == 0:
            print("fitness at iteration", (j+1), result[1])
    print("local optimum discovered by solver:\n", result[0])
    print("fitness score at this local optimum:", result[1])
    return history, result

In [36]:
# defines OpenAI's ES algorithm solver. Note that we needed to anneal the sigma parameter
oes = OpenES(NPARAMS,                  # number of model parameters
                sigma_init=0.5,            # initial standard deviation
                sigma_decay=0.999,         # don't anneal standard deviation
                learning_rate=0.1,         # learning rate for standard deviation
                learning_rate_decay = 1.0, # annealing the learning rate
                popsize=NPOPULATION,       # population size
                antithetic=False,          # whether to use antithetic sampling
                weight_decay=0.00,         # weight decay coefficient
                rank_fitness=False,        # use rank rather than fitness numbers
                forget_best=False)

In [38]:
from tqdm import tqdm

In [None]:
oes_history, result = test_solver(oes)

  0%|          | 0/50 [00:00<?, ?it/s]

Track generation: 1270..1599 -> 329-tiles track


In [None]:
controller.set_weights(result)

In [None]:
np.save('./weights/C_weights.npy', controller.W)

In [None]:
env.close()

In [None]:
import matplotlib.pyplot as plt
from IPython import display

In [None]:
def show_state(env, step=0, name="", info="", image=None):
    """Fn to visualize the agent playing the game in a notebook
    """
    plt.figure(10)
    plt.clf()
    if image is not None:
        im = image
    else:
        im = env.render(mode="rgb_array")[0]
    plt.imshow(im)
    plt.title("{} | Step: {} {}".format(name, step, info))
    plt.axis('off')
    display.clear_output(wait=True)
    display.display(plt.gcf())

In [None]:
for i in ims:
    show_state(None, image=i[0])

In [None]:
np.save("AICAR.npy", ims)