In [None]:
import csv
import json
import os
import re
import time

import gym
import roboschool

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from gym import wrappers
from ipywidgets import Video
from multiprocessing import Pool, Process


## Load a Model

Since we use a custom initializer and this gets serialized during the saving process of the model we need to pass it on when we load it again. Unfortunately with the issue of the background TensorFlow session when importing TensorFlow and multiprocessing we cannot define the initializer one time and use it here again. So we define it twice, one inside the create_model() function and here.

In [None]:
def load_model(model_path):   
    import tensorflow as tf

    class Normc_initializer(tf.keras.initializers.Initializer):
        """
        Create a TensorFlow constant with random numbers normed in the given shape.
        :param std:
        :return:
        """
        def __init__(self, std=1.0):
            self.std = std

        def __call__(self, shape, dtype=None, partition_info=None):
            out = np.random.randn(*shape).astype(np.float32)
            out *= self.std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
            return tf.constant(out)
        
    class ObservationNormalizationLayer(tf.keras.layers.Layer):
        def __init__(self, ob_mean, ob_std, **kwargs):
            self.ob_mean = ob_mean
            self.ob_std = ob_std
            super(ObservationNormalizationLayer, self).__init__(**kwargs)

        def call(self, input):
            return tf.clip_by_value((input - self.ob_mean) / self.ob_std, -5.0, 5.0)
          
        def get_config(self):
            base_config = super(ObservationNormalizationLayer, self).get_config()
            base_config['ob_mean'] = self.ob_mean
            base_config['ob_std'] = self.ob_std
            return base_config
        
        @classmethod
        def from_config(cls, config):
            return cls(**config)
        
    class DiscretizeActionsUniformLayer(tf.keras.layers.Layer):
        def __init__(self, num_ac_bins, adim, ahigh, alow, **kwargs):
            self.num_ac_bins = num_ac_bins
            self.adim = adim
            self.ahigh = ahigh
            self.alow = alow
            super(DiscretizeActionsUniformLayer, self).__init__(**kwargs)

        def call(self, x):
            scores = tf.keras.layers.Dense(
                            self.adim * self.num_ac_bins,
                            kernel_initializer=Normc_initializer(std=0.01),
                            bias_initializer=tf.initializers.zeros())(x)
            
            # Reshape to [n x i x j] where n is dynamically chosen, i equals action dimension and j equals the number
            # of bins
            scores_nab = tf.reshape(scores, [-1, self.adim, self.num_ac_bins])
            # This picks the bin with the greatest value
            a = tf.argmax(scores_nab, 2)
            
            # Then transform the interval from [0, num_ac_bins - 1] to [-1, 1] which equals alow and ahigh
            ac_range_1a = (self.ahigh - self.alow)[None, :]
            return 1. / (self.num_ac_bins - 1.) * tf.keras.backend.cast(a, 'float32') * ac_range_1a + self.alow[None, :]        
        
        # get_config and from_config need to implemented to be able to serialize the model
        def get_config(self):
            base_config = super(DiscretizeActionsUniformLayer, self).get_config()
            base_config['num_ac_bins'] = self.num_ac_bins
            base_config['adim'] = self.adim
            base_config['ahigh'] = self.ahigh
            base_config['alow'] = self.alow
            return base_config
        
        @classmethod
        def from_config(cls, config):
            return cls(**config)
    
    custom_objects = {'Normc_initializer' : Normc_initializer, 
                      'ObservationNormalizationLayer' : ObservationNormalizationLayer,
                      'DiscretizeActionsUniformLayer' : DiscretizeActionsUniformLayer}
    
    return tf.keras.models.load_model(model_path, custom_objects=custom_objects)

def rollout(env, model, render=False, timestep_limit=None, random_stream=None):
    """
    If random_stream is provided, the rollout will take noisy actions with noise drawn from that stream.
    Otherwise, no action noise will be added.
    """

    env_timestep_limit = env.spec.tags.get('wrapper_config.TimeLimit.max_episode_steps')
    timestep_limit = env_timestep_limit if timestep_limit is None else min(timestep_limit, env_timestep_limit)
    rews = []
    t = 0

    ob = env.reset()
    for _ in range(timestep_limit):
        if render:
            env.render()
        ac = act(ob[None], model, random_stream=random_stream)[0]
        ob, rew, done, _ = env.step(ac)
        rews.append(rew)
        t += 1

        if done:
            break
    return np.array(rews, dtype=np.float32), t

def act(ob, model, random_stream=None):   
    action = model.predict(ob)
    
    #if random_stream is not None and model_structure.ac_noise_std != 0:
    #    action += random_stream.randn(*action.shape) * model_structure.ac_noise_std
    
    return action

# Visualize

In [None]:
def parse_generation_number(model_file_path):
    try: 
        number = int(model_file_path.split('snapshot_')[-1].split('.h5')[0])
        return number
    except ValueError:
        return None
    
def index_save_directory(save_directory):
    if not os.path.isdir(save_directory):
        return None, None, None
        
    model_file_paths, log_file_path, config_file = [], None, None
    
    for file in os.listdir(save_directory):
        if file.endswith('.h5'):
            model_file_paths.append(file)
        elif file.endswith('config.json'):
            with open(save_directory + file, 'r') as f:
                config_file = json.load(f)
        elif file.endswith('log.txt'):
            log_file_path = save_directory + file
    
    model_file_paths.sort()
    
    return model_file_paths, log_file_path, config_file

def run_model(env_id, save_directory, model_file_path, record=False):
    env = gym.make(env_id)
    env.reset()
    
    if record:
        video_directory = save_directory + 'videos/' + model_file_path + '/'
        env = wrappers.Monitor(env, video_directory, force=True)
        
    model = load_model(save_directory + model_file_path)
    
    rewards, length = rollout(env, model)
    
    if record:
        return video_directory
    
    return [rewards.sum(), length]

def visualize_model(save_directory, model_file_path, config_file):
                                       
    with Pool(os.cpu_count()) as pool:
        video_directory = pool.apply(func=run_model, args=(config_file['config']['env_id'],
                                                           save_directory,
                                                           model_file_path,
                                                           True))
    
    for file in os.listdir(video_directory):
        if file.endswith('.mp4'):
            return video_directory + file
        
    return None

In [None]:
def parse_log_to_csv(log_file, csv_file):
    with open(log_file) as f:
        content = f.readlines()

    groups = temp =  []
    for line in content:
        line = line.split()

        if not line:
            continue

        if "Generation" in line:
            temp = [line[-1]]
            groups.append(temp)
        else:
            temp.append(line[-1])

    writer = csv.writer(open(csv_file, 'w'))

    writer.writerow(['Generation',
                     'Rew_Mean',
                     'Rew_Std',
                     'Len_Mean',
                     'Eval_Rew_Mean',
                     'Eval_Rew_Std',
                     'Eval_Len_Mean',
                     'Eval_Count',
                     'Episodes_this_Gen',
                     'Episodes_overall',
                     'Timesteps_this_gen',
                     'Timesteps_overall',
                     'Unique_Workers',
                     'ResultsSkippedFrac',
                     'Observation_count',
                     'Time_elapsed_this_Gen',
                     'Time_elapsed_overall',
                     'TimePerMutationMin',
                     'TimePerMutationMax',
                     'TimePerMutationMean',
                     'TimePerMutationCount',
                     'TimeCreateModelMin',
                     'TimeCreateModelMax',
                     'TimeCreateModelMean',
                     'TimeCreateModelCount',                     
                     'TimeSetFlatMin',
                     'TimeSetFlatMax',
                     'TimeSetFlatMean',
                     'TimeSetFlatCount',
                     'TimeSampleMin',
                     'TimeSampleMax',
                     'TimeSampleMean',
                     'TimeSampleCount',
                     'TimeGetNoiseMin',
                     'TimeGetNoiseMax',
                     'TimeGetNoiseMean',
                     'TimeGetNoiseCount',
                     'TimePredictMin',
                     'TimePredictMax',
                     'TimePredictMean',
                     'TimePredictCount'])

    for generation in groups:
        if len(generation) != 43: continue

        # Throw out save_directory and distinction line
        generation = generation[:41]
        row = []
        
        for column in generation:
            row.append(column)

        writer.writerow(row)

def evaluate_to_csv(save_directory, model_file_paths, config_file, csv_eval_file_path, eval_count=5):
    writer = csv.writer(open(csv_eval_file_path, 'w'))
    
    head_row = ['Generation',
                'Eval_per_Gen',
                'Eval_Rew_Mean', 
                'Eval_Rew_Std', 
                'Eval_Len_Mean']
    
    for i in range(eval_count):
        head_row.append('Rew_' + str(i))
        head_row.append('Len_' + str(i))
        
    writer.writerow(head_row)
        
    for model_file_path in model_file_paths:
        results = []
        with Pool(os.cpu_count()) as pool:
            for _ in range(eval_count):
                results.append(pool.apply_async(func=run_model, args=(config_file['config']['env_id'], 
                                                                      save_directory, 
                                                                      model_file_path)))
                
            for i in range(len(results)):
                results[i] = results[i].get()
        
        rewards = np.array(results)[:, 0]
        lengths = np.array(results)[:, 1]
        
        row = [parse_generation_number(model_file_path),
               eval_count,
               np.mean(rewards),
               np.std(rewards),
               np.mean(lengths)]
        
        assert len(rewards) == len(lengths)
        for i in range(len(rewards)):
            row.append(rewards[i])
            row.append(lengths[i])
        
        writer.writerow(row)

In [None]:
eval_count = 5

save_directory = '/tmp/es_25675/'

if not save_directory.endswith('/'):
    save_directory += '/'
    
model_file_paths, log_file_path, config_file = index_save_directory(save_directory)

In [None]:
if model_file_paths is None or config_file is None:
    print("Not enough data to evaluate the training. Please provide a directory with enough data.")
else:
    if log_file_path is not None:    
        # Save parsed log to a csv spreadsheet
        csv_log_file_path = save_directory + 'log.csv'
        parse_log_to_csv(log_file_path, csv_log_file_path)
    
    csv_eval_file_path = save_directory + 'evaluation.csv'
    evaluate_to_csv(save_directory, model_file_paths, config_file, csv_eval_file_path, eval_count)

In [None]:
model_file_path = model_file_paths[-1]
video_file = visualize_model(save_directory, model_file_path, config_file)

if video_file is not None:
    video = Video.from_file(video_file)

In [None]:
video

In [None]:
eval_data = pd.read_csv(save_directory + 'evaluation.csv')
log_data = pd.read_csv(save_directory + 'log.csv')

In [None]:
%matplotlib inline

plt.plot(eval_data.Generation, eval_data.Eval_Rew_Mean)
plt.xlabel('Generation')
plt.ylabel('Cummulative reward')
plt.show()