In [8]:
import os
import json
import pandas as pd
from IPython.display import display, HTML


def display_results_table(data):
  """
  Display the results data as a table sorted by Best Episode Reward.
  """
  # Convert data to a DataFrame and sort it by Best Episode Reward
  df = pd.DataFrame(data)
  df = df.sort_values(by="Best Episode Reward", ascending=False)

  # Set pandas options to disable scientific notation
  pd.options.display.float_format = '{:,.2f}'.format

  # Display the table in Jupyter Notebook
  display(HTML(df.to_html(index=False, escape=False, border=1)))

  # Reset the formatting to default if needed elsewhere
  pd.reset_option("display.float_format")


def load_results(results_dir="results"):
  """
  Load results from all subdirectories in the specified results directory.
  """
  data = []
  for folder in os.listdir(results_dir):
    folder_path = os.path.join(results_dir, folder)
    if os.path.isdir(folder_path):
      try:
        with open(os.path.join(folder_path, "config.json"), "r") as f:
          config = json.load(f)
        with open(os.path.join(folder_path, "results.json"), "r") as f:
          results = json.load(f)
        with open(os.path.join(folder_path, "best_episode.json"), "r") as f:
          best_episode = json.load(f)

        data.append({
            "Model": config["model"].upper(),
            "Params": config.get("model_params", ""),
            "Environment": config.get("environment", "N/A"),
            "Episodes": config["episodes"],
            "Average Episode Time": round(results.get("average_time", 0), 2),
            "Reward Strategy": config.get("reward_strategy", "default"),
            "Average Reward": results.get("average_reward", 0),
            "Reward Variance": results.get("variance_in_rewards", 0),
            "Best Episode Reward": best_episode.get("reward", 0),
            "Results Folder": folder_path
        })
      except (FileNotFoundError, KeyError) as e:
        pass
  return data


# Load and display results
results_data = load_results("results")
display_results_table(results_data)

Model,Params,Environment,Episodes,Average Episode Time,Reward Strategy,Average Reward,Reward Variance,Best Episode Reward,Results Folder
PPO,,LunarLanderContinuous-v3,20,3.12,default,-304.83,72935.14,475.8,results/PPO-20241124-212633
PPO,,LunarLanderContinuous-v3,20,3.21,default,-145.74,87340.07,473.11,results/PPO-20241124-191703
PPO,,LunarLanderContinuous-v3,100,8.12,default,27.84,74708.17,426.36,results/ppo-20241124-185755
PPO,,LunarLanderContinuous-v3,20,10.68,default,-712.16,372336.1,425.11,results/PPO-20241124-212955
PPO,,LunarLanderContinuous-v3,20,2.71,default,-319.63,69819.79,396.92,results/PPO-20241124-191118
PPO,,LunarLanderContinuous-v3,20,3.14,default,-176.94,52596.81,382.33,results/PPO-20241124-195933
PPO,,LunarLanderContinuous-v3,20,2.77,default,-450.14,63314.19,370.01,results/PPO-20241124-195417
PPO,,LunarLanderContinuous-v3,20,2.71,default,-518.22,112864.47,362.37,results/PPO-20241124-212735
SAC,,LunarLanderContinuous-v3,100,4.85,default,-164.48,19523.0,253.31,results/sac-20241124-185755
PPO,,LunarLanderContinuous-v3,20,4.28,default,-796.97,351372.56,234.21,results/PPO-20241124-212830
