In [None]:
import csv

## Load a Model

Since we use a custom initializer and this gets serialized during the saving process of the model we need to pass it on when we load it again. Unfortunately with the issue of the background TensorFlow session when importing TensorFlow and multiprocessing we cannot define the initializer one time and use it here again. So we define it twice, one inside the create_model() function and here.

In [None]:
def load_model(model_path):   
    import numpy as np
    import tensorflow as tf

    class Normc_initializer(tf.keras.initializers.Initializer):
        """
        Create a TensorFlow constant with random numbers normed in the given shape.
        :param std:
        :return:
        """
        def __init__(self, std=1.0):
            self.std = std

        def __call__(self, shape, dtype=None, partition_info=None):
            out = np.random.randn(*shape).astype(np.float32)
            out *= self.std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
            return tf.constant(out)
        
    class ObservationNormalizationLayer(tf.keras.layers.Layer):
        def __init__(self, ob_mean, ob_std, **kwargs):
            self.ob_mean = ob_mean
            self.ob_std = ob_std
            super(ObservationNormalizationLayer, self).__init__(**kwargs)

        def call(self, input):
            return tf.clip_by_value((input - self.ob_mean) / self.ob_std, -5.0, 5.0)
          
        def get_config(self):
            base_config = super(ObservationNormalizationLayer, self).get_config()
            base_config['ob_mean'] = self.ob_mean
            base_config['ob_std'] = self.ob_std
            return base_config
        
        @classmethod
        def from_config(cls, config):
            return cls(**config)
    
    custom_objects = {'Normc_initializer' : Normc_initializer(std=1.0), 
                      'ObservationNormalizationLayer' : ObservationNormalizationLayer}
    
    return tf.keras.models.load_model(model_path, custom_objects=custom_objects)

# Visualize

In [None]:
import gym, roboschool
import PIL.Image
import time
from io import BytesIO
from IPython import display


def showarray(a, fmt='jpeg'):
    f = BytesIO()
    PIL.Image.fromarray(a).save(f, fmt)
    display.display(display.Image(data=f.getvalue()))

def rollout(env, model, render=False, timestep_limit=None, save_obs=False, random_stream=None):
    """
    If random_stream is provided, the rollout will take noisy actions with noise drawn from that stream.
    Otherwise, no action noise will be added.
    """
    
    env_timestep_limit = env.spec.tags.get('wrapper_config.TimeLimit.max_episode_steps')
    timestep_limit = env_timestep_limit if timestep_limit is None else min(timestep_limit, env_timestep_limit)
    rews = []
    t = 0
    if save_obs:
        obs = []
    ob = env.reset()
    for _ in range(timestep_limit):
        if render:
            showarray(env.render(mode='rgb_array'))
            display.clear_output(wait=True)
        ac = act(ob[None], model, random_stream=random_stream)[0]
        if save_obs:
            obs.append(ob)
        ob, rew, done, _ = env.step(ac)
        rews.append(rew)
        t += 1

        if done:
            break
    rews = np.array(rews, dtype=np.float32)
    if save_obs:
        return rews, t, np.array(obs)
    return rews, t

def act(ob, model, random_stream=None):   
    action = model.predict(ob)
    
    # TODO why randomstream? Better generalization?
    if random_stream is not None and model_structure.ac_noise_std != 0:
        action += random_stream.randn(*action.shape) * model_structure.ac_noise_std
    return action

In [None]:
def parse_log_to_csv(log_file, csv_file):
    with open(log_file) as f:
        content = f.readlines()

    groups = temp =  []
    for line in content:
        line = line.split()

        if not line:
            continue

        if "Generation" in line:
            temp = [line[-1]]
            groups.append(temp)
        else:
            temp.append(line[-1])

    writer = csv.writer(open(csv_file, 'w'))

    writer.writerow(['Generation',
                     'Reward Mean',
                     'Reward Standard Deviation',
                     'Length Mean',
                     'Evaluation Reward Mean',
                     'Evaluation Reward Standard Deviation',
                     'Evaluation Length Mean',
                     'Evaluation Count',
                     'Episodes this generation',
                     'Episodes overall',
                     'Timesteps this generation',
                     'Timesteps overall',
                     'Unique Workers',
                     'Observation count',
                     'Time elapsed this generation (s)',
                     'Time elapsed overall (s)'])

    for generation in groups:
        if len(generation) != 18: continue

        row = []

        for column in generation:
            row.append(column)

        writer.writerow(row)


In [None]:
import os
import tensorflow as tf

save_directory = os.getcwd() + "/es_9655/"

file_paths = [save_directory + file for file in os.listdir(save_directory) if file.endswith(".h5")]
file_paths.sort()
trained_models = [load_model(file_path) for file_path in file_paths]

file_paths[-1]

model = trained_models[-1]
model.summary()


In [None]:
import numpy as np
from gym import wrappers

env = gym.make("RoboschoolAnt-v1")
env = wrappers.Monitor(env, save_directory, force=True)

rollout_rend(env, trained_models[-1], render=True)

Test multiple displaying of an environment in one "kernel session"

In [None]:
from multiprocessing import Process
p = Process(target=rollout_rend, args=(env, trained_models[-1], True))
p.start()
p.join()