In [1]:
import csv
import json
import os
import re
import time

import gym
import roboschool

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from gym import wrappers
from ipywidgets import Video
import ipywidgets as widgets
from multiprocessing import Pool, Process
from IPython.display import display

In [2]:
#%%javascript
#IPython.OutputArea.auto_scroll_threshold = 9999;

## Load a Model

Since we use a custom initializer and this gets serialized during the saving process of the model we need to pass it on when we load it again. Unfortunately with the issue of the background TensorFlow session when importing TensorFlow and multiprocessing we cannot define the initializer one time and use it here again. So we define it twice, one inside the create_model() function and here.

In [3]:
def load_model(model_path):   
    import tensorflow as tf

    class Normc_initializer(tf.keras.initializers.Initializer):
        """
        Create a TensorFlow constant with random numbers normed in the given shape.
        :param std:
        :return:
        """
        def __init__(self, std=1.0):
            self.std = std

        def __call__(self, shape, dtype=None, partition_info=None):
            out = np.random.randn(*shape).astype(np.float32)
            out *= self.std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
            return tf.constant(out)
        
    class ObservationNormalizationLayer(tf.keras.layers.Layer):
        def __init__(self, ob_mean, ob_std, **kwargs):
            self.ob_mean = ob_mean
            self.ob_std = ob_std
            super(ObservationNormalizationLayer, self).__init__(**kwargs)

        def call(self, input):
            return tf.clip_by_value((input - self.ob_mean) / self.ob_std, -5.0, 5.0)
          
        def get_config(self):
            base_config = super(ObservationNormalizationLayer, self).get_config()
            base_config['ob_mean'] = self.ob_mean
            base_config['ob_std'] = self.ob_std
            return base_config
        
        @classmethod
        def from_config(cls, config):
            return cls(**config)
        
    class DiscretizeActionsUniformLayer(tf.keras.layers.Layer):
        def __init__(self, num_ac_bins, adim, ahigh, alow, **kwargs):
            self.num_ac_bins = num_ac_bins
            self.adim = adim
            self.ahigh = ahigh
            self.alow = alow
            super(DiscretizeActionsUniformLayer, self).__init__(**kwargs)

        def call(self, x):            
            # Reshape to [n x i x j] where n is dynamically chosen, i equals action dimension and j equals the number
            # of bins
            scores_nab = tf.reshape(x, [-1, self.adim, self.num_ac_bins])
            # This picks the bin with the greatest value
            a = tf.argmax(scores_nab, 2)
            
            # Then transform the interval from [0, num_ac_bins - 1] to [-1, 1] which equals alow and ahigh
            ac_range_1a = (self.ahigh - self.alow)[None, :]
            return 1. / (self.num_ac_bins - 1.) * tf.keras.backend.cast(a, 'float32') * ac_range_1a + self.alow[None, :]        
        
        # get_config and from_config need to implemented to be able to serialize the model
        def get_config(self):
            base_config = super(DiscretizeActionsUniformLayer, self).get_config()
            base_config['num_ac_bins'] = self.num_ac_bins
            base_config['adim'] = self.adim
            base_config['ahigh'] = self.ahigh
            base_config['alow'] = self.alow
            return base_config
        
        @classmethod
        def from_config(cls, config):
            return cls(**config)
    
    custom_objects = {'Normc_initializer' : Normc_initializer, 
                      'ObservationNormalizationLayer' : ObservationNormalizationLayer,
                      'DiscretizeActionsUniformLayer' : DiscretizeActionsUniformLayer}
    
    return tf.keras.models.load_model(model_path, custom_objects=custom_objects)

def rollout_evaluation(env, model, render=False, timestep_limit=None, random_stream=None):
    """
    If random_stream is provided, the rollout will take noisy actions with noise drawn from that stream.
    Otherwise, no action noise will be added.
    """

    env_timestep_limit = env.spec.tags.get('wrapper_config.TimeLimit.max_episode_steps')
    timestep_limit = env_timestep_limit if timestep_limit is None else min(timestep_limit, env_timestep_limit)
    rews = []
    t = 0

    ob = env.reset()
    for _ in range(timestep_limit):
        if render:
            env.render()
        ac = model.predict_on_batch(ob[None])[0]
        ob, rew, done, _ = env.step(ac)
        rews.append(rew)
        t += 1

        if done:
            break
    return np.array(rews, dtype=np.float32), t


def run_model(model_file_path, model_file, save_directory, record=False):   
    
        with open(os.path.join(model_file_path, "config.json"), encoding='utf-8') as f:
            config = json.load(f)
    
        env = gym.make(config['config']['env_id'])
        env.reset()

        if record:
            env = wrappers.Monitor(env, save_directory, force=True)

        model = load_model(os.path.join(model_file_path, model_file))

        rewards, length = rollout_evaluation(env, model)
        
        print(rewards)
        print([rewards.sum(), length])

        return [rewards.sum(), length]

In [4]:
model_file_path = "/tmp/es_2723/14h_35m_14s"
save_directory = "/home/jovyan/work/evolution-strategies"


with Pool(os.cpu_count()) as pool:
    pool.apply(func=run_model, args=(model_file_path, "snapshot_01244.h5", save_directory, True))

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
[-2.0050037  -1.9014888  -2.7920554  -3.4675457  -2.3709848  -1.8773048
 -1.772735   -1.2176484  -1.5918169  -1.4683728  -1.0826861  -2.4670074
 -2.8612452  -2.5890965  -1.6582769  -0.9889073  -1.5963849  -4.9361153
 -2.3565295  -2.8825014  -0.91857725 -1.988986   -0.9505365  -2.462678
 -1.0129569  -1.5374442  -2.0309856  -0.98935527 -0.9287383  -1.1476717
 -0.9792823  -0.8368079  -1.7926075  -0.433907   -1.2073644  -0.7464631
 -0.9094345  -0.7549313  -1.6941236  -0.6013869  -1.6459267  -1.634552
 -0.39750677 -2.3452377  -0.91469246 -0.27223393 -0.26072693 -1.2069854
 -0.47188237 -0.07824723 -0.70381075 -1.767184   -1.1963866  -0.7214007
 -2.5544195  -2.4491284  -1.894066   -0.70447135 -0.82842094 -2.3206706
 -0.6508348  -2.0189588  -1.6468253  -0.16195937 -0.9955688  -1.4073511
 -0.14046337 -0.37489876 -0.26046574 -0.12678288 -0.22753029 -0.36541733
 -1.5296091  -0.124

  0.92752314  0.92752165  0.92752045  0.9275195 ]
[753.4099, 1000]


In [5]:
for file in os.listdir(save_directory):
    if file.endswith('.mp4'):
        video_file = os.path.join(save_directory, file)
        print(file)
video = Video.from_file(video_file)
display(video)

openaigym.video.0.24331.video000000.mp4


Video(value=b'\x00\x00\x00 ftypisom\x00\x00\x02\x00isomiso2avc1mp41\x00\x00\x00\x08free\x00\x03\x15gmdat\x00\x…