Create summary of the requested runs.

In [1]:
import os
import json
import yaml
import numpy as np
from pathlib import Path
from tensorboard.backend.event_processing.event_file_loader import EventFileLoader
from collections import OrderedDict

In [9]:
from_run = 6179
to_run = 6406

In [3]:
def get_run_dict(paths_dict: dict) -> dict:
    result = {}
    #dir_contents: list = []
    for key in paths_dict:
        if key != "working_dir":
            dir_contents = os.listdir(paths_dict[key])
    
            #numbers = []
            #rest_of_file_name: str = ""
            for entry in dir_contents:
                number = 0
                if "_" in entry:
                    number = int(entry.split("_")[0])
                    #rest_of_file_name = str.join("_", entry.split("_")[1:])
                    
                else:
                    number = int(entry.split(".")[0])
                    #rest_of_file_name = str.join(".", entry.split(".")[1:])
                    
                if number not in result:
                    result[number] = {}
                    
                result[number][key] = paths_dict[key] / f"{entry}"
    #if not numbers:
    #    return []
    
            #for num in numbers:
                
    
    # Remove double entries and sort.
    #result = dict.fromkeys(numbers)
    #result.sort()

    #for key in result:
    #    result[key] = {}
    #    result[key]["summary_path"] = paths_dict["summaries_dir"] / f"{key}_summary.json"
    #    result[key]["result_path"] = paths_dict["summaries_dir"] / f"{key}_summary.json"

    return result

In [4]:
def get_result_overview(path: Path, unity: dict) -> dict:
    """Get the mean reward over the last 5 cumulative rewards entries in the tfevents file."""
    cumulative_rewards = []
    ep_length = []

    # Get the tfevents file associated with the current run.
    path_to_result_folder = path / "RollerAgent/"
    try:
        path_to_result = sorted(Path(path_to_result_folder).glob("events.out.tfevents.*"))[0]
    except:
        return {}

    # Using tensorflow to access the tfevents data.
    # datarecord = EventFileLoader(str(path_to_result)).Load()
    for event in EventFileLoader(str(path_to_result)).Load():
        # event = event_pb2.Event.FromString(batch.numpy())
        for value in event.summary.value:
            if value.tag == "Environment/Cumulative Reward":
                cumulative_rewards.append(value.tensor.float_val[0])
            if value.tag == "Environment/Episode Length":
                ep_length.append(value.tensor.float_val[0])

    # Return mean of the last 5 recorded cummulative rewards.
    rewards_of_interest = cumulative_rewards[-100:]
    return {"mean_reward": np.mean(rewards_of_interest), 
            "std_mean_reward": np.round(np.std(rewards_of_interest), 5),
            "episode_length": np.mean(ep_length[-100:]),
            "normalised_reward": np.mean(rewards_of_interest) / unity["maxStep"]
    }

In [5]:
# Enter paths for plot creation and get absolute paths.
paths = {
        "working_dir": "C:/Users/max.muehlefeldt/Documents/GitHub/unity-machine-learning/python/basic_rl_env",
        "results_dir": "results/",
        "results_archive_dir": "results_archive/",
        "stats_dir": "stats/",
        "summaries_dir": "summaries/",
        "configs_dir": "configs/",
        "unity_configs_dir": "unity_configs/"
    }

paths["working_dir"] = Path(paths["working_dir"]).absolute()

for key in paths:
    if key != "working_dir":
        paths[key] = paths["working_dir"] / paths[key]
        

paths

{'working_dir': WindowsPath('C:/Users/max.muehlefeldt/Documents/GitHub/unity-machine-learning/python/basic_rl_env'),
 'results_dir': WindowsPath('C:/Users/max.muehlefeldt/Documents/GitHub/unity-machine-learning/python/basic_rl_env/results'),
 'results_archive_dir': WindowsPath('C:/Users/max.muehlefeldt/Documents/GitHub/unity-machine-learning/python/basic_rl_env/results_archive'),
 'stats_dir': WindowsPath('C:/Users/max.muehlefeldt/Documents/GitHub/unity-machine-learning/python/basic_rl_env/stats'),
 'summaries_dir': WindowsPath('C:/Users/max.muehlefeldt/Documents/GitHub/unity-machine-learning/python/basic_rl_env/summaries'),
 'configs_dir': WindowsPath('C:/Users/max.muehlefeldt/Documents/GitHub/unity-machine-learning/python/basic_rl_env/configs'),
 'unity_configs_dir': WindowsPath('C:/Users/max.muehlefeldt/Documents/GitHub/unity-machine-learning/python/basic_rl_env/unity_configs')}

In [10]:
run_dict = get_run_dict(paths)

selected_ids = [x for x in run_dict.keys() if from_run <= x <= to_run]
selected_ids




[6181,
 6190,
 6191,
 6195,
 6202,
 6230,
 6235,
 6239,
 6241,
 6245,
 6247,
 6249,
 6251,
 6268,
 6271,
 6276,
 6278,
 6286,
 6291,
 6293,
 6297,
 6299,
 6303,
 6306,
 6309,
 6323,
 6326,
 6328,
 6330,
 6339,
 6344,
 6345,
 6351,
 6353,
 6363,
 6367,
 6371,
 6373,
 6375,
 6378,
 6387,
 6397,
 6398,
 6400,
 6402,
 6405,
 6406,
 6183,
 6203]

In [11]:
selected_ids_dict = {}
for run_id in selected_ids:
    selected_ids_dict[run_id] = {}

    if "stats_dir" in run_dict[run_id]:
        with open(run_dict[run_id]["stats_dir"]) as json_file:
        #results["stats"] = json.load(json_file)
            selected_ids_dict[run_id]["stats"] = json.load(json_file)
    
    if "unity_configs_dir" in run_dict[run_id]:
        with open(run_dict[run_id]["unity_configs_dir"]) as json_file:
        #results["stats"] = json.load(json_file)
            selected_ids_dict[run_id]["unity_config"] = json.load(json_file)

    if "configs_dir" in run_dict[run_id]:
        with open(run_dict[run_id]["configs_dir"]) as yaml_file:
        #results["stats"] = json.load(json_file)
            selected_ids_dict[run_id]["training_config"] = yaml.safe_load(yaml_file)
    
    if "results_dir" in run_dict[run_id]:
        selected_ids_dict[run_id]["result"] = get_result_overview(run_dict[run_id]["results_dir"], selected_ids_dict[run_id]["unity_config"])


selected_ids_dict

{6181: {'stats': {'sameRoom': 14411,
   'agentInRoomID0': 14237,
   'agentInRoomID1': 14532,
   'targetInRoomID0': 14457,
   'targetInRoomID1': 14312,
   'episodeCount': 28769,
   'sensorCount': 32,
   'runId': 6181,
   'targetRoomIndex0': 14457,
   'targetRoomIndex1': 14312},
  'unity_config': {'sensorCount': 32,
   'useDecoy': False,
   'createWall': True,
   'doorWidth': 4.0,
   'randomWallPosition': True,
   'randomDoorPosition': True,
   'targetAlwaysInOtherRoomFromAgent': False,
   'targetFixedPosition': False,
   'maxStep': 1000,
   'stepPenalty': -0.0005,
   'runId': 6181,
   'statsExportPath': 'C:\\Users\\max.muehlefeldt\\Documents\\GitHub\\unity-machine-learning\\python\\basic_rl_env\\stats\\6181_stats.json'},
  'training_config': {'behaviors': {'RollerAgent': {'hyperparameters': {'batch_size': 16,
      'beta': '1e-2',
      'buffer_size': 51200,
      'epsilon': 0.2,
      'lambd': 0.95,
      'learning_rate': '1e-3',
      'learning_rate_schedule': 'linear',
      'num_epo

In [23]:
#selected_ids_dict[6183]p.pop()

{'unity_config': {'sensorCount': 32,
  'useDecoy': False,
  'createWall': True,
  'doorWidth': 4.0,
  'randomWallPosition': True,
  'randomDoorPosition': True,
  'targetAlwaysInOtherRoomFromAgent': False,
  'targetFixedPosition': False,
  'maxStep': 1000,
  'stepPenalty': -0.0005,
  'runId': 6183,
  'statsExportPath': 'C:\\Users\\max.muehlefeldt\\Documents\\GitHub\\unity-machine-learning\\python\\basic_rl_env\\stats\\6183_stats.json'},
 'training_config': {'behaviors': {'RollerAgent': {'hyperparameters': {'batch_size': 16,
     'beta': '1e-2',
     'buffer_size': 51200,
     'epsilon': 0.2,
     'lambd': 0.95,
     'learning_rate': '1e-3',
     'learning_rate_schedule': 'linear',
     'num_epoch': 3},
    'keep_checkpoints': 5,
    'max_steps': '5e6',
    'network_settings': {'conditioning_type': 'none',
     'hidden_units': 356,
     'memory': {'memory_size': 16, 'sequence_length': 32},
     'normalize': False,
     'num_layers': 1},
    'reward_signals': {'extrinsic': {'gamma': 0.99,

In [26]:
# Remove ids from the dict if no result in dict saved.
problem_ids = []
for entry in selected_ids_dict:
    if "result" not in selected_ids_dict[entry]:
        problem_ids.append(entry)

for id in problem_ids:
    selected_ids_dict.pop(id)


In [27]:
sorted_dict = OrderedDict(
            sorted(
                selected_ids_dict.items(), key=lambda v: v[1]["result"]["mean_reward"], reverse=True
            )
        )
sorted_dict

OrderedDict([(6202,
              {'stats': {'sameRoom': 14916,
                'agentInRoomID0': 14919,
                'agentInRoomID1': 14909,
                'targetInRoomID0': 14927,
                'targetInRoomID1': 14901,
                'episodeCount': 29828,
                'sensorCount': 32,
                'runId': 6202,
                'targetRoomIndex0': 14927,
                'targetRoomIndex1': 14901},
               'unity_config': {'sensorCount': 32,
                'useDecoy': False,
                'createWall': True,
                'doorWidth': 4.0,
                'randomWallPosition': True,
                'randomDoorPosition': True,
                'targetAlwaysInOtherRoomFromAgent': False,
                'targetFixedPosition': False,
                'maxStep': 1000,
                'stepPenalty': -0.0005,
                'runId': 6202,
                'statsExportPath': 'C:\\Users\\max.muehlefeldt\\Documents\\GitHub\\unity-machine-learning\\python\\basic_rl_e

In [28]:
sorted_dict[6202]

{'stats': {'sameRoom': 14916,
  'agentInRoomID0': 14919,
  'agentInRoomID1': 14909,
  'targetInRoomID0': 14927,
  'targetInRoomID1': 14901,
  'episodeCount': 29828,
  'sensorCount': 32,
  'runId': 6202,
  'targetRoomIndex0': 14927,
  'targetRoomIndex1': 14901},
 'unity_config': {'sensorCount': 32,
  'useDecoy': False,
  'createWall': True,
  'doorWidth': 4.0,
  'randomWallPosition': True,
  'randomDoorPosition': True,
  'targetAlwaysInOtherRoomFromAgent': False,
  'targetFixedPosition': False,
  'maxStep': 1000,
  'stepPenalty': -0.0005,
  'runId': 6202,
  'statsExportPath': 'C:\\Users\\max.muehlefeldt\\Documents\\GitHub\\unity-machine-learning\\python\\basic_rl_env\\stats\\6202_stats.json'},
 'training_config': {'behaviors': {'RollerAgent': {'hyperparameters': {'batch_size': 16,
     'beta': '1e-2',
     'buffer_size': 51200,
     'epsilon': 0.2,
     'lambd': 0.95,
     'learning_rate': '1e-3',
     'learning_rate_schedule': 'linear',
     'num_epoch': 3},
    'keep_checkpoints': 5,


In [None]:
all_normalised_reward = []

for key in sorted_dict:
    all_normalised_reward.append(sorted_dict[key]["result"]["normalised_reward"])

np.mean(all_normalised_reward)
#all_normalised_reward

-0.001524721241281006