evolution-strategies

Copyright (c) 2019 Patrick Deubel

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

evolution-strategies includes:

evolution-strategies-starter
Copyright (c) 2016 OpenAI (http://openai.com)

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

# Evaluation

This notebook can be used to evaluate the data which was generated during the training. Provided the main folder of the experiment or multiple experiments to the `get_experiments()` folder and use the method `evaluate()` on experiments to evaluate.

Still under construction.

In [None]:
import csv
import json
import os
import re
import time

import gym

import pybullet, pybullet_envs

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from gym import wrappers
from ipywidgets import Video
import ipywidgets as widgets
from multiprocessing import Pool, Process
from IPython.display import display

In [None]:
#%%javascript
#IPython.OutputArea.auto_scroll_threshold = 9999;

## Load a Model

Since we use a custom initializer and this gets serialized during the saving process of the model we need to pass it on when we load it again. Unfortunately with the issue of the background TensorFlow session when importing TensorFlow and multiprocessing we cannot define the initializer one time and use it here again. So we define it twice, one inside the create_model() function and here.

In [None]:
# %load load-model.py
def load_model(model_path):   
    import tensorflow as tf

    class Normc_initializer(tf.keras.initializers.Initializer):
        def __init__(self, std=1.0):
            self.std=std

        def __call__(self, shape, dtype=None, partition_info=None):
            out = np.random.randn(*shape).astype(np.float32)
            out *= self.std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
            return tf.constant(out)
    
    class ObservationNormalizationLayer(tf.keras.layers.Layer):
        def __init__(self, ob_mean, ob_std, **kwargs):
            self.ob_mean = ob_mean
            self.ob_std = ob_std
            super(ObservationNormalizationLayer, self).__init__(**kwargs)

        def call(self, x):
            return tf.clip_by_value((x - self.ob_mean) / self.ob_std, -5.0, 5.0)
        
        # get_config and from_config need to implemented to be able to serialize the model
        def get_config(self):
            base_config = super(ObservationNormalizationLayer, self).get_config()
            base_config['ob_mean'] = self.ob_mean
            base_config['ob_std'] = self.ob_std
            return base_config
        
        @classmethod
        def from_config(cls, config):
            return cls(**config)
        
    class DiscretizeActionsUniformLayer(tf.keras.layers.Layer):
        def __init__(self, num_ac_bins, adim, ahigh, alow, **kwargs):
            self.num_ac_bins = num_ac_bins
            self.adim = adim
            # ahigh, alow are NumPy arrays when extracting from the environment, but when the model is loaded from a h5
            # File they get initialised as a normal list, where operations like subtraction does not work, thereforce
            # cast them explicitly
            self.ahigh = np.array(ahigh)
            self.alow = np.array(alow)
            super(DiscretizeActionsUniformLayer, self).__init__(**kwargs)

        def call(self, x):            
            # Reshape to [n x i x j] where n is dynamically chosen, i equals action dimension and j equals the number
            # of bins
            scores_nab = tf.reshape(x, [-1, self.adim, self.num_ac_bins])
            # This picks the bin with the greatest value
            a = tf.argmax(scores_nab, 2)
            
            # Then transform the interval from [0, num_ac_bins - 1] to [-1, 1] which equals alow and ahigh
            ac_range_1a = (self.ahigh - self.alow)[None, :]
            return 1. / (self.num_ac_bins - 1.) * tf.keras.backend.cast(a, 'float32') * ac_range_1a + self.alow[None, :]        
        
        # get_config and from_config need to implemented to be able to serialize the model
        def get_config(self):
            base_config = super(DiscretizeActionsUniformLayer, self).get_config()
            base_config['num_ac_bins'] = self.num_ac_bins
            base_config['adim'] = self.adim
            base_config['ahigh'] = self.ahigh
            base_config['alow'] = self.alow
            return base_config
        
        @classmethod
        def from_config(cls, config):
            return cls(**config)
    
    custom_objects = {'Normc_initializer' : Normc_initializer, 
                      'ObservationNormalizationLayer' : ObservationNormalizationLayer,
                      'DiscretizeActionsUniformLayer' : DiscretizeActionsUniformLayer}
    
    try:
        model = tf.keras.models.load_model(model_path, custom_objects=custom_objects)
    except IOError as e:
        print(e)
        return None
    return model

In [None]:
def rollout_evaluation(env, model, render=False, timestep_limit=None, random_stream=None):
    """
    If random_stream is provided, the rollout will take noisy actions with noise drawn from that stream.
    Otherwise, no action noise will be added.
    """

    env_timestep_limit = env.spec.tags.get('wrapper_config.TimeLimit.max_episode_steps')
    timestep_limit = env_timestep_limit if timestep_limit is None else min(timestep_limit, env_timestep_limit)
    rews = []
    t = 0

    ob = env.reset()
    for _ in range(timestep_limit):
        if render:
            env.render()
        ac = model.predict_on_batch(ob[None])[0]
        try:
            ob, rew, done, _ = env.step(ac)
        except AssertionError:
            # Is thrown when for example ac is a list which has at least one entry with NaN
            raise 
            
        rews.append(rew)
        t += 1

        if done:
            break
    return np.array(rews, dtype=np.float32), t

# Visualize

In [None]:
def plot(x_value, x_label, y_value, y_label, y_std=None, config=None):
    if config is not None:
        print(json.dumps(config, indent=4))
    plt.plot(x_value, y_value)
    # Draw an area around the mean curve which represents the standard deviation
    if y_std is not None:
        plt.fill_between(x_value, y_value - y_std, y_value + y_std, alpha=0.5)
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.show()

In [None]:
class TrainingRun():
    def __init__(self, save_directory, log, config, model_file_paths, evaluation=None, video_file=None):
        self.save_directory = save_directory
        self.log = log
        self.config = config
        
        if not model_file_paths:
            self.no_models = True
            self.model_file_paths = None
        else:
            self.no_models = False
            self.model_file_paths = [os.path.join(save_directory, model) for model in model_file_paths]
            
        if evaluation is not None:
            self.no_evaluation = False
            self.evaluation = evaluation
            self.data = self.merge_log_eval()
        else:
            self.no_evaluation = True
            self.evaluation = None
            self.data = None
            
        if video_file is not None:
            self.no_video = False
        else:
            self.no_video = True
        self.video_file = video_file
        
        if self.log is None or self.config is None:
            print("This TrainingRun is missing either the log file or the configuration file. It will not "
                 + "work as expected.")
    
    def merge_log_eval(self):
        if self.log is not None and self.evaluation is not None:
            return self.log.merge(self.evaluation[['Generation', 'Eval_Rew_Mean', 'Eval_Rew_Std', 'Eval_Len_Mean']],
                           on='Generation')
        return None
        
    def parse_generation_number(self, model_file_path):
        try: 
            number = int(model_file_path.split('snapshot_')[-1].split('.h5')[0])
            return number
        except ValueError:
            return None

    def evaluate(self, force=False, eval_count=5, skip=None, save=False, delete_models=False):       
        if not force:
            if self.data is not None:
                return self.data
            
        if self.no_models:
            print("No models given for that training run, so no new evaluation is possible. You can still plot" + 
                  " your data if you have an evaluation.csv or log.csv.")
            return None
            
        head_row = ['Generation', 'Eval_per_Gen', 'Eval_Rew_Mean', 'Eval_Rew_Std', 'Eval_Len_Mean']   
        
        for i in range(eval_count):
            head_row.append('Rew_' + str(i))
            head_row.append('Len_' + str(i))

        data = []
        
        results_list = []
        pool = Pool(os.cpu_count())

        for model_file_path in self.model_file_paths[::skip]:
            results = []
            gen = self.parse_generation_number(model_file_path)

            for _ in range(eval_count):
                results.append(pool.apply_async(func=self.run_model, args=(model_file_path,)))
            results_list.append((results, gen))
            
        for (results, gen) in results_list:
            for i in range(len(results)):
                results[i] = results[i].get()
                if results[i] == [None, None]:
                    print("The provided model file produces non finite numbers. Stopping.")
                    return

            rewards = np.array(results)[:, 0]
            lengths = np.array(results)[:, 1]
            
            row = [gen,
                   eval_count,
                   np.mean(rewards),
                   np.std(rewards),
                   np.mean(lengths)]

            assert len(rewards) == len(lengths)
            for i in range(len(rewards)):
                row.append(rewards[i])
                row.append(lengths[i])

            data.append(row)
            
        pool.close()
        pool.join()

        self.evaluation = pd.DataFrame(data, columns = head_row)
        if save:
            self.save_evaluation()
        # Only copy the mean values in the merged data
        self.data = self.merge_log_eval()
        
        if delete_models:
            self.delete_model_files
        
        return self.data
    
    def delete_model_files(self, save_last=False):
        if save_last:
            self.model_file_paths = self.model_file_paths[:-1]
        for model_file_path in self.model_file_paths:
            os.remove(model_file_path)
    
    def plot_reward_timestep(self):
        if self.data is not None:
            plot(self.data.TimestepsSoFar, 'Timesteps', self.data.Eval_Rew_Mean, 'Cummulative reward')
        else:
            print("You did not evaluate these results. The evaluated mean reward displayed was computed during training"
                  + "and can have missing values!")
            plot(self.log.TimestepsSoFar, 'Timesteps', self.log.EvalGenRewardMean, 'Cummulative reward')
            
    def save_evaluation(self):
        if self.evaluation is not None:
            self.evaluation.to_csv(os.path.join(self.save_directory, 'evaluation.csv'))
            
    def visualize(self, force=False):
        if self.no_models:
            # Error message in Experiment
            return None
        if not force:
            if self.video_file is not None:
                return self.video_file
            
        latest_model = self.model_file_paths[-1]

        with Pool(os.cpu_count()) as pool:
            pool.apply(func=self.run_model, args=(latest_model, True))

        for file in os.listdir(self.save_directory):
            if file.endswith('.mp4'):
                self.video_file = os.path.join(self.save_directory, file)

        return self.video_file
    
    def run_model(self, model_file_path, record=False):
        env = gym.make(self.config['config']['env_id'])
        env.reset()

        if record:
            env = wrappers.Monitor(env, self.save_directory, force=True)

        model = load_model(model_file_path)

        try:
            rewards, length = rollout_evaluation(env, model)
        except AssertionError:
            # Is thrown when for example ac is a list which has at least one entry with NaN
            return [None, None]

        return [rewards.sum(), length]

class Experiment():
    def __init__(self, config, training_runs):        
        self.config = config
        self.training_runs = training_runs
        self.num_training_runs = len(self.training_runs)
        self.mean_data = None
        self.std_data = None
        
        self.runs_evaluated = True
        for run in self.training_runs:
            if run.no_evaluation:
                self.runs_evaluated = False
                
        # Every run has already an evaluation, therefore initialize self.mean_data and self.std_data with it
        if self.runs_evaluated is True:
            self.evaluate()
    
    def evaluate(self, force=False, eval_count=5, skip=None, save=False, delete_models=False):
        data = []
        no_models = False
        if not self.runs_evaluated:
            for training_run in self.training_runs:
                no_models = training_run.no_models
                if no_models is True:
                    break

        if no_models:
            print("The training runs do not provide model files, therefore the experiment cannot be evaluated." +
                  "Please provide at least one .h5 file.")
        else:
            for training_run in self.training_runs:
                d = training_run.evaluate(force, eval_count, skip, save, delete_models)
                if d is None:
                    return
                data.append(d)
            concatenated = pd.concat([d for d in data])
            self.mean_data = concatenated.groupby(by='Generation', level=0).mean()
            self.std_data = concatenated.groupby(by='Generation', level=0).std()
                    
    def visualize(self, force=False):
        for run in self.training_runs:
            self.video_file = run.visualize(force=force)
            if self.video_file is not None:
                break
        if self.video_file is None:
            print("The training runs do not provide model files, therefore the experiment cannot be visualized." +
                  "Please provide at least one .h5 file so a video can be recorded.")
        return self.video_file
            
    def delete_model_files(self, save_last=False):
        for run in self.training_runs:
            run.delete_model_files(save_last)
    
    def get_num_training_runs(self):
        return self.num_training_runs
    
    def get_all_training_runs(self):
        return [run for run in self.training_runs]
        
    def get_all_logs(self):
        return [run.log for run in self.training_runs]
    
    def get_all_evaluations(self):
        return [run.evaluation for run in self.training_runs]
    
    def print_config(self):
        print(json.dumps(self.config, indent=4))
    
    def plot_reward_timestep(self):
        if self.mean_data is None:
            print("You did not evaluate the results. Please run evaluate() on this experiment. The plotted results"
                 + " are used from the log file.")
            for run in self.training_runs:
                run.plot_reward_timestep()
        else:
            y_std = None
            # If we only have one training run the standard deviation will be NaN across all values and therefore
            # not be plotted. Use standard deviation from the only evaluation we have
            if self.num_training_runs > 1:
                y_std = self.std_data.Eval_Rew_Mean
            plot(self.mean_data.TimestepsSoFar, 'Timesteps', 
                      self.mean_data.Eval_Rew_Mean, 'Cummulative reward',
                      y_std)
            print("Displayed is the mean reward of {} different runs over timesteps with different random seeds." +
                  " If there was more than one run, the shaded region is the standard deviation of the mean reward.")
            
    def plot_reward_generation(self):
        if self.mean_data is None:
            print("You did not evaluate the results. Please run evaluate() on this experiment.")
        else:
            y_std = None
            # If we only have one training run the standard deviation will be NaN across all values and therefore
            # not be plotted. Use standard deviation from the only evaluation we have
            if self.num_training_runs > 1:
                y_std = self.std_data.Eval_Rew_Mean
            plot(self.mean_data.Generation, 'Generation', 
                      self.mean_data.Eval_Rew_Mean, 'Cummulative reward',
                      y_std)
            print("Displayed is the mean reward of {} different runs over timesteps with different random seeds." +
                  " If there was more than one run, the shaded region is the standard deviation of the mean reward.")
    
    def plot_timesteps_timeelapsed(self):
        if self.mean_data is None:
            print("You did not evaluate the results. Please run evaluate() on this experiment.")
        else:
            y_std = None
            # If we only have one training run the standard deviation will be NaN across all values and therefore
            # not be plotted. Use standard deviation from the only evaluation we have
            if self.num_training_runs > 1:
                y_std = self.std_data.TimestepsSoFar
            plot(self.mean_data.TimeElapsed, 'Time elapsed (s)', 
                      self.mean_data.TimestepsSoFar, 'Timesteps',
                      y_std)
            print("Displayed is the mean reward of {} different runs over timesteps with different random seeds." +
                  " If there was more than one run, the shaded region is the standard deviation of the mean reward.")

In [None]:
def get_experiments(main_directory):
    index = {}
    for root, dirs, files in os.walk(main_directory):
        if 'log.csv' in files and 'config.json' in files:
            index[root] = files

    training_runs = []
    for sub_dir in index:
        models, log, evaluation, config, video_file  = [], None, None, None, None
        for file in index[sub_dir]:
            if file.endswith('.h5'):
                models.append(file)
                continue
            elif file.endswith('log.csv'):
                try:
                    log = pd.read_csv(os.path.join(sub_dir, file))
                except pd.errors.EmptyDataError:
                    print("The log file {} is empty. Skipping this folder({}).".format(
                    file, sub_dir))
                continue
            elif file.endswith('evaluation.csv'):
                try:
                    evaluation = pd.read_csv(os.path.join(sub_dir, file))
                except pd.errors.EmptyDataError:
                    print("The evaluation file {} is empty. Continuing.".format(file))
                continue
            elif file.endswith('config.json'):
                with open(os.path.join(sub_dir, file), encoding='utf-8') as f:
                    try:
                        config = json.load(f)
                    except json.JSONDecodeError as e:
                        print("The config file {} is empty or cannot be parsed. Skipping this folder ({}).".format(
                        file, sub_dir))
                continue
            elif file.endswith('.mp4'):
                video_file = os.path.join(sub_dir, file)
                continue
        models.sort()
        if log is not None and config is not None:
            training_runs.append(TrainingRun(sub_dir, log, config, models, evaluation, video_file))

    configs_and_runs = []
    for run in training_runs:
        found = False
        for c in configs_and_runs:
            if c[0] == run.config:
                c[1].append(run)
                found = True
                break

        if not found:
            configs_and_runs.append((run.config, [run]))
            
    return [Experiment(config, runs) for (config, runs) in configs_and_runs]

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [None]:
main_directory = '/home/jovyan/work/evolution-strategies/training_runs/'

In [None]:
experiments = get_experiments(main_directory)

In [None]:
for e in experiments:
    e.print_config()
    e.plot_reward_timestep()

In [None]:
for e in experiments:
    for log in e.get_all_logs():
        print("Timesteps overall", log.TimestepsSoFar.iloc[-1])
        print("Time elapsed", log.TimeElapsed.iloc[-1])
    e.plot_timesteps_timeelapsed()

In [None]:
for e in experiments:
    e.evaluate(force=False, save=True)

In [None]:
for e in experiments:
    e.plot_reward_timestep()

In [None]:
for e in experiments:
    e.print_config()
    for run in e.get_all_training_runs():
        print("Max timesteps", run.log.TimestepsSoFar.iloc[-1])
        display(run.log)

In [None]:
video_files = []
for e in experiments:
    video_files.append((e, e.visualize(force=False)))

In [None]:
for (e, v) in video_files:
    e.print_config()
    video = Video.from_file(v)
    display(video)
    print("----------------------------")

In [None]:
w = widgets.Dropdown(
    options=["{} {}".format(e.config['config']['env_id'], e.config['config']['population_size']) for e in experiments],
    description='Experiment:',
    disabled=False,
)

def on_change_dropdown_exp(change):
    if change['type'] == 'change' and change['name'] == 'value':
        e = change['new']
        print(e)

w.observe(on_change_dropdown_exp)

display(w)