In [1]:
import csv
import json
import os
import re
import time

import gym
import roboschool

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from gym import wrappers
from ipywidgets import Video
import ipywidgets as widgets
from multiprocessing import Pool, Process
from IPython.display import display

In [2]:
# %load load-model.py
def load_model(model_path):   
    import tensorflow as tf

    class Normc_initializer(tf.keras.initializers.Initializer):
        def __init__(self, std=1.0):
            self.std=std

        def __call__(self, shape, dtype=None, partition_info=None):
            out = np.random.randn(*shape).astype(np.float32)
            out *= self.std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
            return tf.constant(out)
    
    class ObservationNormalizationLayer(tf.keras.layers.Layer):
        def __init__(self, ob_mean, ob_std, **kwargs):
            self.ob_mean = ob_mean
            self.ob_std = ob_std
            super(ObservationNormalizationLayer, self).__init__(**kwargs)

        def call(self, x):
            return tf.clip_by_value((x - self.ob_mean) / self.ob_std, -5.0, 5.0)
        
        # get_config and from_config need to implemented to be able to serialize the model
        def get_config(self):
            base_config = super(ObservationNormalizationLayer, self).get_config()
            base_config['ob_mean'] = self.ob_mean
            base_config['ob_std'] = self.ob_std
            return base_config
        
        @classmethod
        def from_config(cls, config):
            return cls(**config)
        
    class DiscretizeActionsUniformLayer(tf.keras.layers.Layer):
        def __init__(self, num_ac_bins, adim, ahigh, alow, **kwargs):
            self.num_ac_bins = num_ac_bins
            self.adim = adim
            # ahigh, alow are NumPy arrays when extracting from the environment, but when the model is loaded from a h5
            # File they get initialised as a normal list, where operations like subtraction does not work, thereforce
            # cast them explicitly
            self.ahigh = np.array(ahigh)
            self.alow = np.array(alow)
            super(DiscretizeActionsUniformLayer, self).__init__(**kwargs)

        def call(self, x):            
            # Reshape to [n x i x j] where n is dynamically chosen, i equals action dimension and j equals the number
            # of bins
            scores_nab = tf.reshape(x, [-1, self.adim, self.num_ac_bins])
            # This picks the bin with the greatest value
            a = tf.argmax(scores_nab, 2)
            
            # Then transform the interval from [0, num_ac_bins - 1] to [-1, 1] which equals alow and ahigh
            ac_range_1a = (self.ahigh - self.alow)[None, :]
            return 1. / (self.num_ac_bins - 1.) * tf.keras.backend.cast(a, 'float32') * ac_range_1a + self.alow[None, :]        
        
        # get_config and from_config need to implemented to be able to serialize the model
        def get_config(self):
            base_config = super(DiscretizeActionsUniformLayer, self).get_config()
            base_config['num_ac_bins'] = self.num_ac_bins
            base_config['adim'] = self.adim
            base_config['ahigh'] = self.ahigh
            base_config['alow'] = self.alow
            return base_config
        
        @classmethod
        def from_config(cls, config):
            return cls(**config)
    
    custom_objects = {'Normc_initializer' : Normc_initializer, 
                      'ObservationNormalizationLayer' : ObservationNormalizationLayer,
                      'DiscretizeActionsUniformLayer' : DiscretizeActionsUniformLayer}
    
    try:
        model = tf.keras.models.load_model(model_path, custom_objects=custom_objects)
    except OSError as e:
        print(e)
        return None
    return model

In [113]:
def rollout_evaluation(env, model, render=False, timestep_limit=None, random_stream=None):
    """
    If random_stream is provided, the rollout will take noisy actions with noise drawn from that stream.
    Otherwise, no action noise will be added.
    """

    env_timestep_limit = env.spec.tags.get('wrapper_config.TimeLimit.max_episode_steps')
    timestep_limit = env_timestep_limit if timestep_limit is None else min(timestep_limit, env_timestep_limit)
    rews = []
    t = 0
    ob = env.reset()
    obs = []
    predictions=[]
    for _ in range(timestep_limit):
        if render:
            env.render()
        obs.append(ob[None])
        pred = model.predict_on_batch(ob[None])
        predictions.append(pred)
        ac = pred[0]
        try:
            ob, rew, done, _ = env.step(ac)
        except AssertionError:
            # Is thrown when for example ac is a list which has at least one entry with NaN
            raise 
        rews.append(rew)
        t += 1

        if done:
            break
    x_test = np.concatenate(obs)
    y_test = np.concatenate(predictions)
    np.savez_compressed('x_test', x_test)
    np.savez_compressed('y_test', y_test)
    return np.array(rews, dtype=np.float32), t


def run_model(model_file_path, model_file, save_directory, record=False):   
    
        #with open(os.path.join(model_file_path, "config.json"), encoding='utf-8') as f:
            #config = json.load(f)
    
        #env = gym.make(config['config']['env_id'])
        env = gym.make("RoboschoolAnt-v1")
        env.reset()
        if record:
            env = wrappers.Monitor(env, save_directory, force=True)

        model = load_model(os.path.join(model_file_path, model_file))
        
        try:
            rewards, length = rollout_evaluation(env, model)
        except AssertionError:
            print("The model file provided produces non finite numbers. Stopping.")
            return
        
        env.close()
        print(rewards)
        print([rewards.sum(), length])

        return [rewards.sum(), length]

In [123]:
model_file_path = "test-dir/discretize-nocustominit/"
model_file_name = "ant-discretize-nocustominit.h5"
# Lets store the video file in the same directory as the model file
save_directory = model_file_path


#with Pool(os.cpu_count()) as pool:
#    pool.apply(func=run_model, args=(model_file_path, model_file_name, save_directory, True))

run_model(model_file_path, model_file_name, save_directory, False)


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
[-0.05414261 -0.25568765 -0.17167957 -0.3685424  -0.39090848 -0.24605495
 -0.5896726  -0.4181856  -0.5840454  -0.658848   -0.20318423 -0.474702
 -0.16052487 -0.1711373  -0.20825009  0.02944881  0.03721193 -0.03396656
  0.01511653  0.4647759   0.5087224   0.27114466  0.21178856  0.28400737
  0.28174412  0.30179462  0.31372187  0.33046132  0.3370098   0.34972358
  0.33182478  0.35294655  0.7118012   0.7038064   0.52433914  0.51242924
  0.50843257  0.4849931   0.5330312   0.25844255  0.2706929   0.20888785
  0.40269232  0.36875576  0.10475568  0.083822    0.15085876  0.09104902
  0.04857123 -0.12771899 -0.16501473 -0.05846804 -0.04086783  0.00994585
  0.039709    0.26859632  0.28519943  0.28774866  0.28790796  0.4408212
  0.36581513  0.28692162 -0.18640164 -0.35732394 -0.17096432  0.22169237
  0.11510492  0.09187966  0.27583343  0.3668176   0.3660124   0.6856745
  0.700557

[-447.79733, 1000]

In [None]:
!ls -la test-dir/test-custom-objects

In [None]:
#!sleep 2
for file in os.listdir(save_directory):
    if file.endswith('.mp4'):
        video_file = os.path.join(save_directory, file)
        print(file)
video = Video.from_file(video_file)
display(video)

In [49]:
test_data = np.load(os.path.join(model_file_path, 'x_test.npz'))

In [104]:
test_data = np.load('x_test.npz')

In [105]:
test_data['arr_0']

ValueError: Object arrays cannot be loaded when allow_pickle=False

In [106]:
a = np.random.randn(10)
b = np.random.randn(10)
c = [a, b]

#np.savez_compressed('test', np.array(c))


In [108]:
np.array(c)

array([[-0.48760796, -0.03137171,  0.10772476,  0.76232787,  0.57247626,
         0.54990346,  1.0025784 ,  0.20682156,  0.69951539, -0.51341809],
       [ 0.54140483, -1.064237  ,  1.74136111,  1.17694124,  0.54364525,
        -1.43829813,  1.14558571, -0.46005004,  0.4309901 ,  0.07608661]])

In [115]:
test = np.load('test-dir/x_test.npz')

In [121]:
solo = test['arr_0'][2]


In [53]:
class RunningStat(object):
    def __init__(self, shape, eps):
        self.sum = np.zeros(shape, dtype=np.float32)
        self.sumsq = np.full(shape, eps, dtype=np.float32)
        self.count = eps

    def increment(self, s, ssq, c):
        self.sum += s
        self.sumsq += ssq
        self.count += c

    @property
    def mean(self):
        return self.sum / self.count

    @property
    def std(self):
        return np.sqrt(np.maximum(self.sumsq / self.count - np.square(self.mean), 1e-2))

ValueError: Object arrays cannot be loaded when allow_pickle=False

In [44]:
a = np.random.randn(10)
b = np.random.randn(10)
c = [a, b]

np.savez_compressed('test', np.array(c))


In [45]:
test = np.load('test.npz')

In [46]:
test['arr_0']

array([[-0.75126079,  0.85180292, -0.0060419 , -0.18615067, -1.75643322,
        -1.44359063, -0.5915373 , -0.27241683,  0.83457284,  1.58251457],
       [ 0.88645348,  2.17334851, -0.40356089, -0.92314391, -0.75248295,
        -0.45417969,  1.8393929 , -0.54930343, -2.35341329,  1.7582542 ]])

In [7]:
class RunningStat(object):
    def __init__(self, shape, eps):
        self.sum = np.zeros(shape, dtype=np.float32)
        self.sumsq = np.full(shape, eps, dtype=np.float32)
        self.count = eps

    def increment(self, s, ssq, c):
        self.sum += s
        self.sumsq += ssq
        self.count += c

    @property
    def mean(self):
        return self.sum / self.count

    @property
    def std(self):
        return np.sqrt(np.maximum(self.sumsq / self.count - np.square(self.mean), 1e-2))

In [11]:
rs = RunningStat(shape=(28, ), eps=1e-5)

In [13]:
rs.sum = test_data['_sum']
rs.sumsq = test_data['sumsq']
rs.count = test_data['count']

In [18]:
rs.mean

array([-4.0026894e-01, -1.2531167e-01,  9.5411342e-01, -1.1759889e-03,
       -1.1659848e-03, -1.2082948e-02, -1.7148201e-01, -8.8134572e-02,
       -1.8175726e-01,  1.2976294e-03, -1.3821378e+00,  1.2262263e-03,
       -7.7275717e-01, -1.4502215e-03,  1.7050633e+00,  3.7816670e-04,
       -4.7281194e-02,  2.4792904e-03,  6.3160962e-01, -6.0218587e-03,
        2.1209793e-02,  1.7203779e-03, -9.0054941e-01,  3.8272135e-03,
        9.7128230e-01,  9.7128230e-01,  9.7177738e-01,  9.7078729e-01],
      dtype=float32)

In [19]:
rs.std

array([0.1       , 0.26949415, 0.1       , 0.1       , 0.1       ,
       0.1       , 0.2954735 , 0.11102042, 0.8113045 , 0.1       ,
       1.0485069 , 0.1       , 0.18097788, 0.1       , 0.7350216 ,
       0.1       , 0.79907376, 0.1       , 1.2506089 , 0.1       ,
       0.97364384, 0.1       , 0.41828343, 0.1       , 0.1670268 ,
       0.1670268 , 0.16562328, 0.16841705], dtype=float32)