In [1]:
import csv
import json
import os
import re
import time

import gym
import roboschool

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from gym import wrappers
from ipywidgets import Video
from multiprocessing import Pool, Process
from IPython.display import display

## Load a Model

Since we use a custom initializer and this gets serialized during the saving process of the model we need to pass it on when we load it again. Unfortunately with the issue of the background TensorFlow session when importing TensorFlow and multiprocessing we cannot define the initializer one time and use it here again. So we define it twice, one inside the create_model() function and here.

In [2]:
def load_model(model_path):   
    import tensorflow as tf

    class Normc_initializer(tf.keras.initializers.Initializer):
        """
        Create a TensorFlow constant with random numbers normed in the given shape.
        :param std:
        :return:
        """
        def __init__(self, std=1.0):
            self.std = std

        def __call__(self, shape, dtype=None, partition_info=None):
            out = np.random.randn(*shape).astype(np.float32)
            out *= self.std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
            return tf.constant(out)
        
    class ObservationNormalizationLayer(tf.keras.layers.Layer):
        def __init__(self, ob_mean, ob_std, **kwargs):
            self.ob_mean = ob_mean
            self.ob_std = ob_std
            super(ObservationNormalizationLayer, self).__init__(**kwargs)

        def call(self, input):
            return tf.clip_by_value((input - self.ob_mean) / self.ob_std, -5.0, 5.0)
          
        def get_config(self):
            base_config = super(ObservationNormalizationLayer, self).get_config()
            base_config['ob_mean'] = self.ob_mean
            base_config['ob_std'] = self.ob_std
            return base_config
        
        @classmethod
        def from_config(cls, config):
            return cls(**config)
        
    class DiscretizeActionsUniformLayer(tf.keras.layers.Layer):
        def __init__(self, num_ac_bins, adim, ahigh, alow, **kwargs):
            self.num_ac_bins = num_ac_bins
            self.adim = adim
            self.ahigh = ahigh
            self.alow = alow
            super(DiscretizeActionsUniformLayer, self).__init__(**kwargs)

        def call(self, x):            
            # Reshape to [n x i x j] where n is dynamically chosen, i equals action dimension and j equals the number
            # of bins
            scores_nab = tf.reshape(x, [-1, self.adim, self.num_ac_bins])
            # This picks the bin with the greatest value
            a = tf.argmax(scores_nab, 2)
            
            # Then transform the interval from [0, num_ac_bins - 1] to [-1, 1] which equals alow and ahigh
            ac_range_1a = (self.ahigh - self.alow)[None, :]
            return 1. / (self.num_ac_bins - 1.) * tf.keras.backend.cast(a, 'float32') * ac_range_1a + self.alow[None, :]        
        
        # get_config and from_config need to implemented to be able to serialize the model
        def get_config(self):
            base_config = super(DiscretizeActionsUniformLayer, self).get_config()
            base_config['num_ac_bins'] = self.num_ac_bins
            base_config['adim'] = self.adim
            base_config['ahigh'] = self.ahigh
            base_config['alow'] = self.alow
            return base_config
        
        @classmethod
        def from_config(cls, config):
            return cls(**config)
    
    custom_objects = {'Normc_initializer' : Normc_initializer, 
                      'ObservationNormalizationLayer' : ObservationNormalizationLayer,
                      'DiscretizeActionsUniformLayer' : DiscretizeActionsUniformLayer}
    
    return tf.keras.models.load_model(model_path, custom_objects=custom_objects)

def rollout(env, model, render=False, timestep_limit=None, random_stream=None):
    """
    If random_stream is provided, the rollout will take noisy actions with noise drawn from that stream.
    Otherwise, no action noise will be added.
    """

    env_timestep_limit = env.spec.tags.get('wrapper_config.TimeLimit.max_episode_steps')
    timestep_limit = env_timestep_limit if timestep_limit is None else min(timestep_limit, env_timestep_limit)
    rews = []
    t = 0

    ob = env.reset()
    for _ in range(timestep_limit):
        if render:
            env.render()
        ac = act(ob[None], model, random_stream=random_stream)[0]
        ob, rew, done, _ = env.step(ac)
        rews.append(rew)
        t += 1

        if done:
            break
    return np.array(rews, dtype=np.float32), t

def act(ob, model, random_stream=None):   
    action = model.predict_on_batch(ob)
    
    #if random_stream is not None and model_structure.ac_noise_std != 0:
    #    action += random_stream.randn(*action.shape) * model_structure.ac_noise_std
    
    return action

# Visualize

In [24]:
def run_model(env_id, save_directory, model_file_path, record=False):
    env = gym.make(env_id)
    env.reset()
    
    if record:
        video_directory = os.path.join(save_directory, 'videos/')
        env = wrappers.Monitor(env, video_directory, force=True)
        
    model = load_model(model_file_path)
    
    rewards, length = rollout(env, model)
    
    if record:
        return video_directory
    
    return [rewards.sum(), length]

In [28]:
class TrainingRun():
    def __init__(self, save_directory, log, config, model_file_paths, evaluation=None):
        self.save_directory = save_directory
        self.log = log
        self.config = config
        self.model_file_paths = [os.path.join(save_directory, model) for model in model_file_paths]
        if evaluation is not None:
            self.evaluation = evaluation
            self.data = self.merge_log_eval()
        else:
            self.evaluation = None
            self.data = None
        self.video = None
        self.video_file = None
    
    def merge_log_eval(self):
        if self.log is not None and self.evaluation is not None:
            return self.log.merge(self.evaluation[['Generation', 'Eval_Rew_Mean', 'Eval_Rew_Std', 'Eval_Len_Mean']],
                           on='Generation')     #.set_index('Generation')
        return None
        
    def parse_generation_number(self, model_file_path):
        try: 
            number = int(model_file_path.split('snapshot_')[-1].split('.h5')[0])
            return number
        except ValueError:
            return None

    def evaluate(self, force=False, eval_count=5, skip=None, save=False):
        if not force:
            if self.data is not None:
                return self.data
            
        head_row = ['Generation', 'Eval_per_Gen', 'Eval_Rew_Mean', 'Eval_Rew_Std', 'Eval_Len_Mean']   
        
        for i in range(eval_count):
            head_row.append('Rew_' + str(i))
            head_row.append('Len_' + str(i))

        data = []

        for model_file_path in self.model_file_paths[::skip]:
            results = []
            with Pool(os.cpu_count()) as pool:
                for _ in range(eval_count):
                    results.append(pool.apply_async(func=run_model, args=(self.config['config']['env_id'], 
                                                                          self.save_directory, 
                                                                          model_file_path)))

                for i in range(len(results)):
                    results[i] = results[i].get()

            rewards = np.array(results)[:, 0]
            lengths = np.array(results)[:, 1]

            gen = self.parse_generation_number(model_file_path)
            
            row = [gen,
                   eval_count,
                   np.mean(rewards),
                   np.std(rewards),
                   np.mean(lengths)]

            assert len(rewards) == len(lengths)
            for i in range(len(rewards)):
                row.append(rewards[i])
                row.append(lengths[i])

            data.append(row)

        self.evaluation = pd.DataFrame(data, columns = head_row)
        if save:
            self.save_evaluation()
        # Only copy the mean values in the merged data
        self.data = self.merge_log_eval()
        return self.data
    
    def plot_reward_timestep(self):
        if self.data is not None:
            self.plot(self.data.TimestepsSoFar, 'Timesteps', self.data.Eval_Rew_Mean, 'Cummulative reward')
        else:
            print("You did not evaluate these results. The evaluated mean reward displayed was computed during training"
                  + "and can have missing values!")
            self.plot(self.log.TimestepsSoFar, 'Timesteps', self.log.EvalGenRewardMean, 'Cummulative reward')
            
    def plot(self, x_value, x_label, y_value, y_label):
        plt.plot(x_value, y_value)
        plt.xlabel(x_label)
        plt.ylabel(y_label)
        plt.show()
    
    def save_evaluation(self):
        if self.evaluation is not None:
            self.evaluation.to_csv(os.path.join(self.save_directory, 'evaluation.csv'))
            
    def visualize(self, force=False):
        if not force:
            if self.video is not None and self.video_file is not None:
                return self.video
            
        if len(self.model_file_paths) < 1:
            print("There are no model files indexed. You must provide at leas one .h5 file!")
            return
        
        latest_model = self.model_file_paths[-1]

        with Pool(os.cpu_count()) as pool:
            video_directory = pool.apply(func=run_model, args=(self.config['config']['env_id'],
                                                               self.save_directory,
                                                               latest_model,
                                                               True))

        for file in os.listdir(video_directory):
            if file.endswith('.mp4'):
                self.video_file = file
                self.video = Video.from_file(os.path.join(video_directory, file))

        return self.video

class Experiment():
    def __init__(self, main_directory):
        self.main_directory = main_directory
        index = self.index_main_directory(main_directory)
        self.training_runs = self.load_data(index)
        self.num_training_runs = len(self.training_runs)
        self.mean_data = None
        self.std_data = None
    
    def evaluate(self, force=False, eval_count=5, skip=None, save=False):
        data = []
        for training_run in self.training_runs:
            data.append(training_run.evaluate(force, eval_count, skip, save))
        
        concatenated = pd.concat([d for d in data])
        self.mean_data = concatenated.groupby(by='Generation', level=0).mean()
        self.std_data = concatenated.groupby(by='Generation', level=0).std()       
    
    def plot_reward_timestep(self):
        if self.mean_data is None:
            print("You did not evaluate the results. Please run evaluate() on this experiment.")
        else:
            y_std = None
            # If we only have one training run the standard deviation will be NaN across all values and therefore
            # not be plotted. Use standard deviation from the only evaluation we have
            if self.num_training_runs > 1:
                y_std = self.std_data.Eval_Rew_Mean
            self.plot(self.mean_data.TimestepsSoFar, 'Timesteps', 
                      self.mean_data.Eval_Rew_Mean, 'Cummulative reward',
                      y_std)
    
    def plot(self, x_value, x_label, y_value, y_label, y_std=None):
        plt.plot(x_value, y_value)
        # Draw an area around the mean curve which represents the standard deviation
        if y_std is not None:
            plt.fill_between(x_value, y_value - y_std, y_value + y_std)
        plt.xlabel(x_label)
        plt.ylabel(y_label)
        plt.show()
        
    
    def index_main_directory(self, main_directory):
        index = {}
        for root, dirs, files in os.walk(main_directory):
            if 'log.csv' in files and 'config.json' in files:
                index[root] = files
        return index
    
    def load_data(self, index):
        training_runs = []
        for sub_dir in index:
            models, log, evaluation, config,  = [], None, None, None
            for file in index[sub_dir]:
                if file.endswith('.h5'):
                    models.append(file)
                elif file.endswith('log.csv'):
                    log = pd.read_csv(os.path.join(sub_dir, file))
                elif file.endswith('evaluation.csv'):
                    evaluation = pd.read_csv(os.path.join(sub_dir, file))
                elif file.endswith('config.json'):
                    with open(os.path.join(sub_dir, file), encoding='utf-8') as f:
                        config = json.load(f)
            models.sort()
            training_runs.append(TrainingRun(sub_dir, log, config, models, evaluation))

        return training_runs
    
    def visualize(self, force=False):
        if len(self.training_runs) > 0:
            video = self.training_runs[-1].visualize(force=force)
            return video

In [4]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [5]:
main_directory = '/tmp/test'

In [29]:
e = Experiment(main_directory)

In [30]:
e.evaluate(save=True)

In [31]:
video = e.visualize()

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Instructions for updating:
Colocations handled automatically by placer.


In [35]:
e.training_runs[-1].video_file

'openaigym.video.0.3113.video000000.mp4'

In [None]:
e.plot_reward_timestep()

In [None]:
#%%javascript
#IPython.OutputArea.auto_scroll_threshold = 9999;

In [None]:
from IPython.display import display
from IPython.display import JSON
for (log, config, _) in data:
    print(json.dumps(config, indent=4))
    #display(log)
    %matplotlib inline

    plt.plot(log.TimestepsSoFar, log.GenLenMean)
    plt.xlabel('Timesteps')
    plt.ylabel('Cummulative reward')
    plt.show()

In [None]:
model_file_path = model_file_paths[-1]
video_file = visualize_model(save_directory, model_file_path, config_file)

if video_file is not None:
    video = Video.from_file(video_file)